In [24]:
import pandas as pd

df = pd.read_csv("./Resume/Resume.csv")

In [25]:
df.head()

Unnamed: 0,ID,Resume_str,Resume_html,Category
0,16852973,HR ADMINISTRATOR/MARKETING ASSOCIATE\...,"<div class=""fontsize fontface vmargins hmargin...",HR
1,22323967,"HR SPECIALIST, US HR OPERATIONS ...","<div class=""fontsize fontface vmargins hmargin...",HR
2,33176873,HR DIRECTOR Summary Over 2...,"<div class=""fontsize fontface vmargins hmargin...",HR
3,27018550,HR SPECIALIST Summary Dedica...,"<div class=""fontsize fontface vmargins hmargin...",HR
4,17812897,HR MANAGER Skill Highlights ...,"<div class=""fontsize fontface vmargins hmargin...",HR


In [26]:
import re

def clean_text(text):
    if not isinstance(text, str):
        return ""
        
    text = text.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()
    text = re.sub(r'[^a-zA-Z0-9,.!?@#%&()\-+=:;\'\" ]', '', text)
    
    return text

df['Cleaned_Resume'] = df['Resume_str'].apply(clean_text)

df.to_csv("./Resume/Resume_cleaned.csv", index=False)

print("Файл с очищенными резюме успешно сохранён как Resume_cleaned.csv")


Файл с очищенными резюме успешно сохранён как Resume_cleaned.csv


In [9]:
print(df.shape)
print(df['Category'].value_counts())
print(df.head(2)['Cleaned_Resume'].values)

(2484, 5)
Category
INFORMATION-TECHNOLOGY    120
BUSINESS-DEVELOPMENT      120
ADVOCATE                  118
CHEF                      118
ENGINEERING               118
ACCOUNTANT                118
FINANCE                   118
FITNESS                   117
AVIATION                  117
SALES                     116
BANKING                   115
HEALTHCARE                115
CONSULTANT                115
CONSTRUCTION              112
PUBLIC-RELATIONS          111
HR                        110
DESIGNER                  107
ARTS                      103
TEACHER                   102
APPAREL                    97
DIGITAL-MEDIA              96
AGRICULTURE                63
AUTOMOBILE                 36
BPO                        22
Name: count, dtype: int64
["HR ADMINISTRATORMARKETING ASSOCIATE HR ADMINISTRATOR Summary Dedicated Customer Service Manager with 15+ years of experience in Hospitality and Customer Service Management. Respected builder and leader of customer-focused teams; stri

In [27]:
import pandas as pd
from openai import OpenAI
import re
import os 

api_key = os.environ.get("OPENAI_API_KEY")

if not api_key:
    raise ValueError("OPENAI_API_KEY not found in environment variables.")

client = OpenAI(api_key=api_key)

def evaluate_resume(resume_text):
    prompt = f"""
You are an expert HR recruiter. Please evaluate the following resume text according to these criteria (score 1 to 5 for each):

Criteria:
1. Hard skills
2. Soft skills
3. Foreign language
4. Experience – Courses, certifications, past projects, work history
5. Personality traits

Provide the scores as numbers only. Then give an overall score (1-5).

Resume:
{resume_text}
"""
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        temperature=0,
        max_tokens=200
    )
    return response.choices[0].message.content
    
def parse_scores(llm_response):
    scores = {}

    rename_map = {
        '1._hard_skills': 'hard_skills',
        '2._soft_skills': 'soft_skills',
        '3._foreign_language': 'foreign_language',
        '4._experience_–_courses,_certifications,_past_projects,_work_history': 'experience',
        '5._personality_traits': 'personality_traits',
        'overall_score': 'overall_score'
    }

    for line in llm_response.split('\n'):
        match = re.match(r'^(.*):\s*([\d.]+)', line)
        if match:
            key = match.group(1).strip().lower().replace(' ', '_')
            value = float(match.group(2))
            key = rename_map.get(key, key)  
            scores[key] = value
    return scores



df = pd.read_csv("./Resume/Resume_cleaned.csv")

results = []

for i, row in df.iterrows():
    resume_text = row['Cleaned_Resume']
    print(f"Processing resume ID={row['ID']} ({i+1}/{len(df)})")
    
    llm_response = evaluate_resume(resume_text)
    print("LLM response:")
    print(llm_response)
    print("-" * 40)
    
    scores = parse_scores(llm_response)
    
    record = {
        'ID': row['ID'],
        'Cleaned_Resume': resume_text
    }
    record.update(scores)
    results.append(record)

df_scores = pd.DataFrame(results)

df_scores.to_csv("Resume_scores.csv", index=False)
print("Оценки с резюме и текстом сохранены")


Processing resume ID=16852973 (1/2484)
LLM response:
1. Hard skills: 4
2. Soft skills: 4
3. Foreign language: 1
4. Experience – Courses, certifications, past projects, work history: 5
5. Personality traits: 4

Overall score: 4
----------------------------------------
Processing resume ID=22323967 (2/2484)
LLM response:
1. Hard skills: 4
2. Soft skills: 4
3. Foreign language: 1
4. Experience – Courses, certifications, past projects, work history: 4
5. Personality traits: 3

Overall score: 4
----------------------------------------
Processing resume ID=33176873 (3/2484)
LLM response:
1. Hard skills: 5
2. Soft skills: 4
3. Foreign language: 1
4. Experience – Courses, certifications, past projects, work history: 5
5. Personality traits: 4

Overall score: 4
----------------------------------------
Processing resume ID=27018550 (4/2484)
LLM response:
1. Hard skills: 3
2. Soft skills: 4
3. Foreign language: 1
4. Experience – Courses, certifications, past projects, work history: 3
5. Personali

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o in organization org-WDzZg4LIHeNKsuCbXkrDccQf on tokens per min (TPM): Limit 30000, Used 28373, Requested 2197. Please try again in 1.14s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}}

In [2]:
import pandas as pd 
df_orig = pd.read_csv("./Resume/Resume.csv")            
df_scores = pd.read_csv("Resume_scores_backup.csv")   

df_orig_filtered = df_orig[df_orig['ID'].isin(df_scores['ID'])]


df_merged = df_scores.merge(df_orig_filtered[['ID', 'Category']], on='ID', how='left')

df_merged.to_csv("Resume_scores_with_category.csv", index=False)
