In [8]:
from dotenv import load_dotenv
from pprint import pprint

load_dotenv('../.env')

True

In [6]:
OPENAI_MODEL="gpt-4o"

JOB_SAMPLES = "../data/job_samples"
CV_SAMPLES = "../data/cv_samples"

In [11]:
def get_job_description(file_name: str):
    with open(f"{JOB_SAMPLES}/{file_name}", "r") as file:
        return file.read()


In [27]:
from PyPDF2 import PdfReader

def get_cv_text_from_pdf(file_name: str):
    reader = PdfReader(f"{CV_SAMPLES}/{file_name}")
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text



In [31]:
from pydantic import BaseModel, Field


class JobRequirements(BaseModel):
    position: str = Field(description="The job position (e.g. 'Backend Developer')")
    position_level: str = Field(description="The level of the job position (e.g. 'Senior, Middle, Junior, Unknown')")
    technical_skills: list[str] = Field(description="The technical skills required for the job, mark 'or' for alternatives (e.g. ['Python or Kotlin or C#', 'Django', 'PostgreSQL'])")
    requirements: list[str] = Field(description="All key requirements for the job as a list of strings (e.g.['At least 3 years of experience developing software applications using (Golang/Java/Kotlin/C#/C++)', 'Excellent problem-solving and communication skills.'])")
    experience_years: int = Field(description="The number of years of experience required for the job (e.g. 3)")
    education: str = Field(description="The education required for the job: level and field of study (e.g. 'Bachelor of Science in Computer Science')")
    language_skills: str = Field(description="The language skills and level of proficiency (e.g. 'English, Native') ")
    

class CVFeatures(BaseModel):
    name: str = Field(description="The full name of the CV (e.g. 'John Doe')")
    email: str = Field(description="The email of the CV (e.g. 'john.doe@example.com')")
    location: str = Field(description="The location of the CV (e.g. 'London, UK')")
    position: str = Field(description="The job position from the CV (e.g. 'Backend Developer')")
    position_level: str = Field(description="The level of the job position from the CV (e.g. 'Senior, Middle, Junior, Unknown')")
    experience_years: int = Field(description="The number of years of experience from the CV on the position (e.g. 3)")
    technical_skills: list[str] = Field(description="The technical skills from the CV on the position (e.g. ['Python', 'Django', 'PostgreSQL'])")
    experience: list[str] = Field(description="The experience from the CV on the position")
    achievements: list[str] = Field(description="The particular achievements from the CV on the position")
    education: list[str] = Field(description="The education from the CV")
    language_skills: list[str] = Field(description="The language skills and level of proficiency from the CV on the position (e.g. ['English intermediate', 'Russian native'])") 


In [32]:
from openai import OpenAI
client = OpenAI()


def extract_job_requirements(job_description: str):
    system_prompt = ("You are an assistant with the task of extracting precise information from job description. "
                     "You will be prompted with the contents of a job description. Your task is to extract requirements "
                     "for the job position, position level, technical skills, experience years, education, language skills "
                     "and other requirements from this job description. "
                     "\n\n"
                     "Do your best to include as many requirements as possible!")
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": job_description},
      ]
    
    
    response = client.beta.chat.completions.parse(
      model=OPENAI_MODEL,
      messages=messages,
      response_format=JobRequirements
    )
    
    return response.choices[0].message.parsed


def extract_cv_features(cv_text: str):
    system_prompt = ("You are an assistant with the task of extracting precise information from CV. "
                     "You will be prompted with the contents of a CV. Your task is to extract features "
                     "for the CV: name, email, location, position, position level, experience years, technical skills, "
                     "experience, achievements, education, language skills. "
                     "\n\n"
                     "Do your best to include as many requirements as possible!")                 
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": cv_text},
      ]
    
    response = client.beta.chat.completions.parse(
      model=OPENAI_MODEL,
      messages=messages,
      response_format=CVFeatures
    )
    
    return response.choices[0].message.parsed
                         
    

In [52]:
class ComparisonResult(BaseModel):
    position_match: int = Field(description="The score for the position name match. Be careful, some position has different names but same meaning (0-10)")
    technical_skills_match: int = Field(description="The score for the technical skills match. How many required skills are present in the CV (0-10)")
    experience_years_match: bool = Field(description="The score for the experience years match. True if the experience years are equal or greater than required, otherwise False")
    experience_match: int = Field(description="The score for the experience match. How many experience requirements are present in the CV (0-10)")
    achievements_match: int = Field(description="The score for the achievements match. How many achievements from the CV are associated with the job requirements (0-10)")
    education_match: int = Field(description="The score for the education match. How fully one of the education from the CV is matched with the job education requirements: education level and field of study (0-10)")
    language_skills_match: int = Field(description="The score for the language skills match. How many language skills from the CV are matched with the job language skills requirements (0-10)")


def compare_cv_with_job(cv_features: CVFeatures, job_requirements: JobRequirements):
    system_prompt = ("You are an assistant with the task of comparing CV with job requirements. "
                     "You will be prompted with the CV features and job requirements. Your task is to compare "
                     "the CV with the job requirements and return the score for each category. "
                     "The score is a number between 0 and 10. "
                     "The score is calculated based on the following criteria: "
                     "1. Position match: 0-10, how well the position from the CV matches the position from the job requirements "
                     "2. Technical skills match: 0-10, how many required skills are present in the CV "
                     "3. Experience years match: True/False, if the experience years from the CV are equal or greater than required "
                     "4. Experience match: 0-10, how many experience requirements are present in the CV "
                     "5. Achievements match: 0-10, how many achievements from the CV are associated with the job requirements "
                     "6. Education match: 0-10, how fully one of the education from the CV is matched with the job education requirements: education level and field of study "
                     "7. Language skills match: 0-10, how many language skills from the CV are matched with the job language skills requirements "
                     "Return the score for each category as a dictionary with the category name as the key and the score as the value. "
                     "For example: {'position_match': 8, 'technical_skills_match': 9, 'experience_years_match': True, 'experience_match': 7, 'achievements_match': 6, 'education_match': 8, 'language_skills_match': 7}")
    
    user_prompt = f"CV features: {cv_features}\nJob requirements: {job_requirements}"
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
      ]
    
    response = client.beta.chat.completions.parse(
      model=OPENAI_MODEL,
      messages=messages,
      response_format=ComparisonResult
    )
    
    return response.choices[0].message.parsed

                



In [42]:
descr = get_job_description('job2.txt')
print(descr)


Backend Developer
Golang


Remotely
Worldwide
Привет! Мы — Space307, международная финтех-компания полного цикла. В нашей команде 280+ сотрудников в области разработки и маркетинга.

Мы топим за адекватность и рационализм, работаем в кросс-функциональных продуктовых командах, любим совместные тусовки и не любим бюрократию.
 Мы ищем коллегу, Go-разработчика, для того чтобы создавать и развивать продуктовые сервисы финансовой компании в составе кросс-функциональной команды.
Чем предстоит заниматься:
Разрабатывать и поддерживать микросервисы на Go
Участвовать в планировании и доведении задач до результата
Брать ответственность за работоспособность сервисов и принятием участия в их эксплуатации.
Ключевые теги проекта:
Go, WebSocket, MySQL, Redis, Kafka, RabbitMQ, Clickhouse, Cassandra.

Мы ждём, что ты:
Имеешь опыт разработки на golang от трёх лет
Обладаешь опытом написания сложных SQL-запросов (MySQL, ClickHouse, Vertica)
Обладаешь опытом работы с очередями и брокерами сообщений (RabbitMQ

In [43]:
job_requirements = extract_job_requirements(descr)
print(job_requirements)

position='Backend Developer' position_level='Unknown' technical_skills=['Golang', 'WebSocket', 'MySQL', 'Redis', 'Kafka or RabbitMQ', 'Clickhouse', 'Cassandra', 'Docker or Kubernetes', 'Python or PHP or JS'] requirements=['Experience in Golang development for at least 3 years', 'Experience writing complex SQL queries (MySQL, ClickHouse, Vertica)', 'Experience with message queues and brokers (RabbitMQ, Kafka)'] experience_years=3 education='Unknown' language_skills='Unknown'


In [68]:
cv_text = get_cv_text_from_pdf('cv3.pdf')
print(cv_text)

Кутепов Максим Владимирович
Мужчина, 24 года, родился 7 сентября 2000
+7 (902) 5846757  — предпочитаемый способ связи
xydensgm@gmail.com
Проживает: Екатеринбург
Гражданство: Россия, есть разрешение на работу: Россия
Готов к переезду, не готов к командировкам
Желаемая должность и зарплата
Golang разработчик
Специализации:
—  Программист, разработчик
Занятость: полная занятость
График работы: полный день
Желательное время в пути до работы: не имеет значения
Опыт работы — 7 лет 3 месяца
Май 2024 —
настоящее время
7 месяцевITEZ
Node.js разработчик
Проект - b2c и b2b обменник криптовалюты, кастодиальный кошелёк. Разрабатывали систему с
широким набором интеграций с различными платежными системами и блокчейнами.
- Разрабатывал сервиc интеграций с палатежными системами на NodeJS и NextJS
- Создал систему анализа транзакций и балансов кошельков на базе Clickhouse, Kafka, Kafka
Connect и NestJS
- Автоматизировал интеграцинонное тестирование перечисленных систем
Технологии и инструменты: TypeScri

In [69]:
cv_features = extract_cv_features(cv_text)
pprint(cv_features)

CVFeatures(name='Кутепов Максим Владимирович', email='xydensgm@gmail.com', location='Екатеринбург', position='Golang разработчик', position_level='Unknown', experience_years=7, technical_skills=['Golang', 'JavaScript', 'Node.js', 'Docker', 'Go', 'React', 'TypeScript', 'NestJS', 'RabbitMQ', 'Gitlab', 'Backend', 'PostgreSQL', 'Linux', 'Clickhouse', 'VueJS', 'Kubernetes', 'Микросервисная архитектура', 'AWS', 'PHP', 'SQL', 'Apache Kafka', 'Frontend', 'Grafana', 'Agile', 'Scrum', 'Blockchain', 'NoSQL', 'Helm', 'Terraform'], experience=['ITEZ: Node.js разработчик - разработка b2c и b2b обменника криптовалюты и кастодиального кошелька, интеграция с платежными системами и блокчейнами, создание системы анализа транзакций и балансов кошельков.', "ООО 'ОктаКод'/ ООО 'ЦЕНТР РАЗРАБОТКИ СИЭЕКСАЙО РУС': Backend developer / Tech Lead - проектирование микросервисной архитектуры криптовалютного маржинального брокера, управление счетами клиентов и anti-fraud сервисами, внедрение монорепозитория и CI проц

In [70]:
comparison = compare_cv_with_job(cv_features, job_requirements)
print(comparison)


position_match=8 technical_skills_match=8 experience_years_match=True experience_match=8 achievements_match=9 education_match=5 language_skills_match=0


In [71]:
def total_match_score(comparison: ComparisonResult):
    weights = {
        'position_match': 4,
        'experience_years_match': 3,
        'technical_skills_match': 2,
        'experience_match': 3,
        'achievements_match': 3,
        'education_match': 1,
        'language_skills_match': 1,
    }
    
    max_score = 0
    total_score = 0

    total_score += comparison.position_match * weights['position_match']
    total_score += comparison.technical_skills_match * weights['technical_skills_match']
    total_score += int(comparison.experience_years_match) * weights['experience_years_match']
    total_score += comparison.experience_match * weights['experience_match']
    total_score += comparison.achievements_match * weights['achievements_match']
    total_score += comparison.education_match * weights['education_match']
    total_score += comparison.language_skills_match * weights['language_skills_match']

    max_score += weights['position_match'] * 10
    max_score += weights['technical_skills_match'] * 10
    max_score += weights['experience_years_match'] * 1
    max_score += weights['experience_match'] * 10
    max_score += weights['achievements_match'] * 10
    max_score += weights['education_match'] * 10
    max_score += weights['language_skills_match'] * 10

    return round(total_score / max_score * 100)

total_match_score(comparison)

75