In [8]:
from dotenv import load_dotenv
from pprint import pprint

load_dotenv('../.env')

True

In [6]:
OPENAI_MODEL="gpt-4o"

JOB_SAMPLES = "../data/job_samples"
CV_SAMPLES = "../data/cv_samples"

In [11]:
def get_job_description(file_name: str):
    with open(f"{JOB_SAMPLES}/{file_name}", "r") as file:
        return file.read()


In [27]:
from PyPDF2 import PdfReader

def get_cv_text_from_pdf(file_name: str):
    reader = PdfReader(f"{CV_SAMPLES}/{file_name}")
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text



In [31]:
from pydantic import BaseModel, Field


class JobRequirements(BaseModel):
    position: str = Field(description="The job position (e.g. 'Backend Developer')")
    position_level: str = Field(description="The level of the job position (e.g. 'Senior, Middle, Junior, Unknown')")
    technical_skills: list[str] = Field(description="The technical skills required for the job, mark 'or' for alternatives (e.g. ['Python or Kotlin or C#', 'Django', 'PostgreSQL'])")
    requirements: list[str] = Field(description="All key requirements for the job as a list of strings (e.g.['At least 3 years of experience developing software applications using (Golang/Java/Kotlin/C#/C++)', 'Excellent problem-solving and communication skills.'])")
    experience_years: int = Field(description="The number of years of experience required for the job (e.g. 3)")
    education: str = Field(description="The education required for the job: level and field of study (e.g. 'Bachelor of Science in Computer Science')")
    language_skills: str = Field(description="The language skills and level of proficiency (e.g. 'English, Native') ")
    

class CVFeatures(BaseModel):
    name: str = Field(description="The full name of the CV (e.g. 'John Doe')")
    email: str = Field(description="The email of the CV (e.g. 'john.doe@example.com')")
    location: str = Field(description="The location of the CV (e.g. 'London, UK')")
    position: str = Field(description="The job position from the CV (e.g. 'Backend Developer')")
    position_level: str = Field(description="The level of the job position from the CV (e.g. 'Senior, Middle, Junior, Unknown')")
    experience_years: int = Field(description="The number of years of experience from the CV on the position (e.g. 3)")
    technical_skills: list[str] = Field(description="The technical skills from the CV on the position (e.g. ['Python', 'Django', 'PostgreSQL'])")
    experience: list[str] = Field(description="The experience from the CV on the position")
    achievements: list[str] = Field(description="The particular achievements from the CV on the position")
    education: list[str] = Field(description="The education from the CV")
    language_skills: list[str] = Field(description="The language skills and level of proficiency from the CV on the position (e.g. ['English intermediate', 'Russian native'])") 


In [32]:
from openai import OpenAI
client = OpenAI()


def extract_job_requirements(job_description: str):
    system_prompt = ("You are an assistant with the task of extracting precise information from job description. "
                     "You will be prompted with the contents of a job description. Your task is to extract requirements "
                     "for the job position, position level, technical skills, experience years, education, language skills "
                     "and other requirements from this job description. "
                     "\n\n"
                     "Do your best to include as many requirements as possible!")
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": job_description},
      ]
    
    
    response = client.beta.chat.completions.parse(
      model=OPENAI_MODEL,
      messages=messages,
      response_format=JobRequirements
    )
    
    return response.choices[0].message.parsed


def extract_cv_features(cv_text: str):
    system_prompt = ("You are an assistant with the task of extracting precise information from CV. "
                     "You will be prompted with the contents of a CV. Your task is to extract features "
                     "for the CV: name, email, location, position, position level, experience years, technical skills, "
                     "experience, achievements, education, language skills. "
                     "\n\n"
                     "Do your best to include as many requirements as possible!")                 
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": cv_text},
      ]
    
    response = client.beta.chat.completions.parse(
      model=OPENAI_MODEL,
      messages=messages,
      response_format=CVFeatures
    )
    
    return response.choices[0].message.parsed
                         
    

In [21]:
descr = get_job_description('job1.txt')
print(descr)


Backend Developer (Golang)
RemoteSMEFull time

Limassol, Limassol, Cyprus
Almaty, Almaty Region, Kazakhstan
Overview
Application
Description
We are looking for a Backend Software Engineer (Golang) to join our team at our European fintech startup. Our startup serves more than 500,000 customers across Europe and develops banking products for customers having 500+ microservices on board.

The ideal candidate for this role will be a passionate and knowledgeable engineer who is comfortable developing new features and services as well as maintaining existing codebase. The candidate should have a strong background in software engineering and computer science with a particular focus on developing robust and reliable solutions.

We are using a modern stack of technologies and platforms like PostgreSQL, Kafka, AWS, Kubernetes, Gitlab CI, Prometheus, Grafana.

Responsibilities:

Develop banking microservices for SME products.
Participate in code reviews and provide feedback to other team members.

In [22]:
job_requirements = extract_job_requirements(descr)
print(job_requirements)

position='Backend Developer (Golang)' position_level='Middle/Senior' technical_skills=['Golang or Java or Kotlin or C# or C++', 'PostgreSQL', 'Kafka', 'AWS', 'Kubernetes', 'Gitlab CI', 'Prometheus', 'Grafana'] requirements=['Bachelor’s degree in Computer Science or related field.', 'At least 3 years of experience developing software applications using (Golang/Java/Kotlin/C#/C++)', 'Excellent problem-solving and communication skills.', 'Ability to work in a fast-paced, dynamic environment.', 'Experience in developing banking microservices and applications will be a huge advantage.'] experience_years=3 education='Bachelor’s degree in Computer Science or related field' language_skills='English, proficient'


In [29]:
cv_text = get_cv_text_from_pdf('cv1.pdf')
print(cv_text)

dtforweb@gmail.com
linkedin.com/in/dtymoshchenko/Dmitriy Tymoshchenko
Fullstack developerWarsaw - Poland
380671538777 UA / 48502151534 PL


 - 
Worked on projects: 
1) Travel Domain - Development and optimization of a travel agency platform focusing on performance and user
experience enhancements. Responsibilities included A/B testing, writing unit and e2e tests with Cypress, and optimizing
Core Web Vitals to ensure top-tier performance. Developed a custom caching service for data retrieval, wrote extensive
documentation for application modules, and collaborated with cross-functional teams to improve site reliability and
functionality.
Company: Travelshift, project Guidetoeurope.com
Technologies: React, Next.js, Node.js, Cypress, Typescript, Webpack, GraphQL, GCP
2) E-commerce Data Parsing - Developed and maintained a service to parse and process merchant emails, providing
structured data for integration into internal systems. Collaborated with engineering teams to create and optim

In [33]:
cv_features = extract_cv_features(cv_text)
pprint(cv_features)

CVFeatures(name='Dmitriy Tymoshchenko', email='dtforweb@gmail.com', location='Warsaw, Poland', position='Fullstack Developer', position_level='Senior', experience_years=7, technical_skills=['React', 'Next.js', 'Node.js', 'Cypress', 'Typescript', 'Webpack', 'GraphQL', 'GCP', 'react-hook-form', 'MUI', 'Vitest', 'PostgreSQL', 'Prisma ORM', 'Redux', 'Redux-saga', 'Sass', 'storybook', 'Jest', 'Docker', 'MongoDB', 'Vue', 'Blitz.js', 'Vite', 'React-native', 'MaterialUI', 'Redux-thunk', 'React-router', 'MobX', 'Angular', 'HighCharts.js', 'D3.js', 'OOP', 'FP', 'Design Patterns', 'Express.js', 'Redis', 'Sequelize', 'Bull-mq', 'Kubernetes', 'AWS', 'Azure', 'Git', 'Gulp', 'WebSockets', 'SCSS', 'tailwind.css', 'styled-components', 'Materialize-UI', 'React Testing Library'], experience=['Worked on Travel Domain project at Travelshift, enhancing performance and user experience, developing a custom caching service, and collaborating with teams.', 'Developed and maintained an e-commerce data parsing se

In [34]:
cv_text = get_cv_text_from_pdf('cv3.pdf')
print(cv_text)

Кутепов Максим Владимирович
Мужчина, 24 года, родился 7 сентября 2000
+7 (902) 5846757  — предпочитаемый способ связи
xydensgm@gmail.com
Проживает: Екатеринбург
Гражданство: Россия, есть разрешение на работу: Россия
Готов к переезду, не готов к командировкам
Желаемая должность и зарплата
Golang разработчик
Специализации:
—  Программист, разработчик
Занятость: полная занятость
График работы: полный день
Желательное время в пути до работы: не имеет значения
Опыт работы — 7 лет 3 месяца
Май 2024 —
настоящее время
7 месяцевITEZ
Node.js разработчик
Проект - b2c и b2b обменник криптовалюты, кастодиальный кошелёк. Разрабатывали систему с
широким набором интеграций с различными платежными системами и блокчейнами.
- Разрабатывал сервиc интеграций с палатежными системами на NodeJS и NextJS
- Создал систему анализа транзакций и балансов кошельков на базе Clickhouse, Kafka, Kafka
Connect и NestJS
- Автоматизировал интеграцинонное тестирование перечисленных систем
Технологии и инструменты: TypeScri

In [35]:
cv_features = extract_cv_features(cv_text)
pprint(cv_features)

CVFeatures(name='Кутепов Максим Владимирович', email='xydensgm@gmail.com', location='Екатеринбург', position='Golang разработчик', position_level='Senior', experience_years=7, technical_skills=['Golang', 'JavaScript', 'Node.js', 'Docker', 'Go', 'React', 'TypeScript', 'NestJS', 'RabbitMQ', 'Gitlab', 'Backend', 'PostgreSQL', 'Linux', 'Clickhouse', 'VueJS', 'Kubernetes', 'Микросервисная архитектура', 'AWS', 'PHP', 'SQL', 'Apache Kafka', 'Frontend', 'Grafana', 'Agile', 'Scrum', 'Blockchain', 'NoSQL', 'Helm', 'Terraform'], experience=['Май 2024 - ITEZ - Node.js разработчик: Разработка b2c и b2b обменника криптовалюты и кастодиального кошелька', "Ноябрь 2021 - Апрель 2024 ООО 'ОктаКод' / ООО 'ЦЕНТР РАЗРАБОТКИ СИЭЕКСАЙО РУС': Backend developer / Tech Lead - Разработка криптовалютного маржинального брокера", 'Январь 2021 - Ноябрь 2021 BotHelp: Node.js Tech Lead - Разработка платформы для no-code создания ботов', "Декабрь 2019 - Декабрь 2020 ООО 'ТриниДата': Full-Stack разработчик - Создание we

In [None]:
1. Соответствие специализации позиции в вакансии
 - соответствие профессии (области) позиции в вакансии

2. Совпадение технических навыковов и компетенций:
 - языки программирования, фреймворки, командная работа, лидерство
 - соответствие навыков требованиям указанным в вакансии

3. Количество полных лет опыта в области, требуемой в вакансии

4. Соответствие опыта работы, с требованиями вакансии

5. Наличие конкретных достижений в опыте, требуемых в вакансии

6. Соответствие уровня образования требованиям в вакансии

7. Знание языков, требуемых в вакансии и уровень владения языками