In [33]:
import json
import copy
import pandas as pd
import importlib
from datetime import datetime
from tqdm import tqdm

from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
from chains.skill_grader import skill_grader as skill_grader_chain


In [4]:
# read pdf content
pdf_files_contents = pd.read_csv('./data/pdf_files_contents.csv')
print(pdf_files_contents.shape)
display(pdf_files_contents.head(2))

(37, 3)


Unnamed: 0,pdf_path,pdf_name,pdf_content
0,./../../../../data/sample_resume/Abdallah Elra...,Abdallah Elraey,\n\npage_number: 0\nAbdallah Mohamed Jumaa ML ...
1,./../../../../data/sample_resume/Hadeel Mabrou...,Hadeel Mabrouk,\n\npage_number: 0\nHadeel Emad Mabrouk\n\n+20...


In [5]:
applicants_resume_analysis_df = pdf_files_contents.copy(deep=True)

In [6]:
# read desired skill list
with open('./desired_skills.txt', 'r') as fh:
    desired_skills_list = fh.read()

desired_skills_list = desired_skills_list.split('\n')
desired_skills_list

['object detection',
 'semantic segmentation',
 'image classification',
 'computer vision',
 'transformers',
 'vision transformers']

In [7]:
# add desired skill list as columns to applicants_resume_analysis_df
for skill in desired_skills_list:
    applicants_resume_analysis_df[skill] = False

display(applicants_resume_analysis_df.head(2))

Unnamed: 0,pdf_path,pdf_name,pdf_content,object detection,semantic segmentation,image classification,computer vision,transformers,vision transformers
0,./../../../../data/sample_resume/Abdallah Elra...,Abdallah Elraey,\n\npage_number: 0\nAbdallah Mohamed Jumaa ML ...,False,False,False,False,False,False
1,./../../../../data/sample_resume/Hadeel Mabrou...,Hadeel Mabrouk,\n\npage_number: 0\nHadeel Emad Mabrouk\n\n+20...,False,False,False,False,False,False


## 1. get desired_skill_score_dict for a list of desired skills

In [8]:
# desired_skill = desired_skills_list[-1]
# desired_skill

In [9]:
# pdf_content = pdf_files_contents.at[0, 'pdf_content']
# pdf_name = pdf_files_contents.at[0, 'pdf_name']
# print(f'pdf_name: {pdf_name}')
# # print(pdf_content)

In [10]:
def skill_grader(desired_skill, resume_content):
    skill_score = skill_grader_chain.invoke(
        {
            "desired_skill": desired_skill,  
            "resume_content": resume_content
        }
    )

    return skill_score

In [11]:
def get_desired_skill_score_dict(resume_content):
    desired_skill_score_dict = dict()

    for desired_skill in desired_skills_list:
        skill_score = skill_grader(desired_skill, resume_content)
        desired_skill_score_dict[desired_skill] = skill_score.binary_score
    # print(f"Skill: {desired_skill}")
    # print(f"Present: {skill_score.binary_score}")
    # print(skill_score)

    return desired_skill_score_dict



In [12]:
# desired_skill_score_dict = get_desired_skill_score_dict(pdf_content)

In [13]:
# desired_skill_score_dict

In [14]:
for idx in range(applicants_resume_analysis_df.shape[0]):
    pdf_content = applicants_resume_analysis_df.at[idx, 'pdf_content']
    desired_skill_score_dict = get_desired_skill_score_dict(pdf_content)
    for desired_skill in desired_skills_list:
        applicants_resume_analysis_df.at[idx, desired_skill] = desired_skill_score_dict[desired_skill]

In [15]:
display(applicants_resume_analysis_df.head(10))

Unnamed: 0,pdf_path,pdf_name,pdf_content,object detection,semantic segmentation,image classification,computer vision,transformers,vision transformers
0,./../../../../data/sample_resume/Abdallah Elra...,Abdallah Elraey,\n\npage_number: 0\nAbdallah Mohamed Jumaa ML ...,False,False,True,True,True,False
1,./../../../../data/sample_resume/Hadeel Mabrou...,Hadeel Mabrouk,\n\npage_number: 0\nHadeel Emad Mabrouk\n\n+20...,True,False,False,True,False,False
2,./../../../../data/sample_resume/Ziad Abdeltaw...,Ziad Abdeltawab,\n\npage_number: 0\nZiad Abdeltawab\n\nziad.ta...,False,True,False,True,False,False
3,./../../../../data/sample_resume/Adham Tawfik.pdf,Adham Tawfik,\n\npage_number: 0\nAdam Mohamed Tawfik\n\nAdd...,True,False,True,True,False,False
4,./../../../../data/sample_resume/Ahmed Mostafa...,Ahmed Mostafa Attia,"\n\npage_number: 0\nAhmed Mostafa Attia\n\nAI,...",True,False,True,True,False,False
5,./../../../../data/sample_resume/Ali Nabil.pdf,Ali Nabil,\n\npage_number: 0\nAI Engineer Fulltime\n\n11...,True,False,False,True,False,False
6,./../../../../data/sample_resume/Nouran Ali.pdf,Nouran Ali,\n\npage_number: 0\nNouran Ali\n\nSenior Compu...,True,False,True,True,False,False
7,./../../../../data/sample_resume/Mohamed Abdel...,Mohamed Abdelaziz,\n\npage_number: 0\nMohamed Abdelaziz\n\nMilit...,True,True,False,True,True,False
8,./../../../../data/sample_resume/Abdalrahman K...,Abdalrahman Kamel,\n\npage_number: 0\nEducation\n\n1. Master deg...,True,False,True,True,True,False
9,./../../../../data/sample_resume/Lamees Elbakr...,Lamees Elbakr,\n\npage_number: 0\nResearch and Development T...,True,False,True,True,True,False


In [16]:
applicants_resume_analysis_df.to_csv('./output/applicants_resume_analysis_df.csv')

# fine-grained skill grader

In [12]:
from chains.fine_grained_skill_grader import fine_grained_skill_grader as fine_grained_skill_grader_chain

In [23]:
def get_present_date():
    now = datetime.now()
    month_name = now.strftime("%B")
    current_year = now.year
    return f" {month_name} {current_year}"

# Example usage
print(get_present_date())

 June 2025


In [24]:
def fine_grained_skill_grader(desired_skill, resume_content):
    """get fine_grained_skill_grade for one desired_skill
    """
    current_date = get_present_date()

    fine_grained_skill_grade = fine_grained_skill_grader_chain.invoke(
        {
            'desired_skill': desired_skill,
            'resume_content': resume_content,
            'current_date':current_date
        }
    )

    return fine_grained_skill_grade.model_dump()

In [25]:
def process_desired_skills_summary_dict(desired_skills_summary_dict_for_one_candidate):
    """
    Process the skills dictionary to add key_experiences_count and months_of_experience_total.
    
    Args:
        desired_skills_summary_dict_for_one_candidate (dict): Dictionary containing skill information
        
    Returns:
        dict: Updated dictionary with additional calculated fields
    """
    # Create a deep copy to avoid modifying the original dictionary
    processed_dict = copy.deepcopy(desired_skills_summary_dict_for_one_candidate)
    
    # Iterate through each skill in the dictionary
    for skill_name, skill_data in processed_dict.items():
        # Add key_experiences_count
        if "key_experiences" in skill_data:
            skill_data["key_experiences_count"] = len(skill_data["key_experiences"])
        else:
            skill_data["key_experiences_count"] = 0
        
        # Process months_of_experience and calculate total
        if "months_of_experience" in skill_data:
            months_list = skill_data["months_of_experience"]
            total_months = 0
            
            for month_value in months_list:
                # Since months_of_experience contains int values, sum them directly
                total_months += month_value
            
            skill_data["months_of_experience_total"] = total_months
        else:
            skill_data["months_of_experience_total"] = 0
    
    return processed_dict

In [26]:
# temp
applicants_resume_analysis_df = pd.read_csv('./output/applicants_resume_analysis_df.csv')
display(applicants_resume_analysis_df.head(2))

Unnamed: 0,pdf_path,pdf_name,pdf_content,object detection,semantic segmentation,image classification,computer vision,transformers,vision transformers,summary
0,./../../../../data/sample_resume/Abdallah Elra...,Abdallah Elraey,\n\npage_number: 0\nAbdallah Mohamed Jumaa ML ...,False,False,True,True,True,False,Abdallah Mohamed Jumaa has demonstrated advanc...
1,./../../../../data/sample_resume/Hadeel Mabrou...,Hadeel Mabrouk,\n\npage_number: 0\nHadeel Emad Mabrouk\n\n+20...,True,False,False,True,False,False,Hadeel Emad Mabrouk has demonstrated a solid f...


In [37]:
fine_grained_skill_grade_dict = dict()
applicants_resume_analysis_df['summary'] = ''

for idx in tqdm(range(applicants_resume_analysis_df.shape[0])):


    fine_grained_skill_grade_dict_for_one_candidate = dict()
    pdf_content = applicants_resume_analysis_df.at[idx, 'pdf_content']
    candidate_name = applicants_resume_analysis_df.at[idx, 'pdf_name']
    # print(f'candidate_name: {candidate_name}')

    for desired_skill in desired_skills_list:
        if applicants_resume_analysis_df.at[idx, desired_skill]:
            fine_grained_skill_grade_dict_for_one_candidate[desired_skill] = fine_grained_skill_grader(desired_skill, pdf_content)

    # process dictionary to add key_experiences_count and years_of_experience_total
    fine_grained_skill_grade_dict_for_one_candidate = process_desired_skills_summary_dict(fine_grained_skill_grade_dict_for_one_candidate)

    # join summaries for different skills
    candidate_summary = ' '.join( [details['summary'] for details in fine_grained_skill_grade_dict_for_one_candidate.values()])
    fine_grained_skill_grade_dict_for_one_candidate['candidate_summary'] = candidate_summary

    fine_grained_skill_grade_dict[candidate_name] = fine_grained_skill_grade_dict_for_one_candidate

    applicants_resume_analysis_df.at[idx, 'summary'] = candidate_summary


100%|██████████| 37/37 [12:44<00:00, 20.67s/it]


In [39]:
applicants_resume_analysis_df.to_csv('./output/applicants_resume_analysis_df.csv', index=False)


with open('./output/fine_grained_skill_grade_dict.json', 'w') as fh:
    json.dump(fine_grained_skill_grade_dict, fh, indent=4)

# write a summary about the skill

In [17]:
from chains.skill_summary_writer import skill_summary_writer as skill_summary_writer_chain

In [18]:
def skill_summary_writer(desired_skill, resume_content):
    """get skill summary for one desired_skill
    """
    skill_summary = skill_summary_writer_chain.invoke(
        {
            'desired_skill': desired_skill,
            'resume_content': resume_content
        }
    )

    return skill_summary.model_dump()


In [19]:
desired_skills_summary_dict = dict()
applicants_resume_analysis_df['summary'] = ''

for idx in range(applicants_resume_analysis_df.shape[0]):

    desired_skills_summary_dict_for_one_candidate = dict()

    pdf_content = applicants_resume_analysis_df.at[idx, 'pdf_content']
    candidate_name = applicants_resume_analysis_df.at[idx, 'pdf_name']

    for desired_skill in desired_skills_list:
        if applicants_resume_analysis_df.at[idx, desired_skill]:
            desired_skills_summary_dict_for_one_candidate[desired_skill] = skill_summary_writer(desired_skill, pdf_content)

    desired_skills_summary_dict[candidate_name] = desired_skills_summary_dict_for_one_candidate

    candidate_summary = ' '.join( [details['summary'] for details in desired_skills_summary_dict_for_one_candidate.values()])
    applicants_resume_analysis_df.at[idx, 'summary'] = candidate_summary


In [None]:
applicants_resume_analysis_df.to_csv('./output/applicants_resume_analysis_df.csv', index=False)


with open('./output/desired_skills_summary_dict.json', 'w') as fh:
    json.dump(desired_skills_summary_dict, fh, indent=4)


In [14]:
desired_skills_summary_dict

{'image classification': {'skill_name': 'image classification',
  'proficiency_level': 'Advanced',
  'summary': 'Abdallah Mohamed Jumaa has demonstrated advanced proficiency in image classification through the development and fine-tuning of a MobileNet CNN for recognizing sign language digits. This project involved leveraging transfer learning to adapt a pre-trained model for specific image classification tasks, showcasing his deep learning expertise. His experience in model optimization for mobile applications further highlights his technical capabilities in this domain.',
  'key_experiences': ['Developed and fine-tuned a MobileNet CNN to recognize sign language digits, demonstrating expertise in image classification and deep learning.',
   'Leveraged transfer learning to adapt pre-trained MobileNet architecture for specific patterns in sign language digits, showcasing advanced model adaptation skills.'],
  'years_of_experience': 'NOT GIVEN',
  'confidence_score': 0.85},
 'computer vi

## create candidate summary based on summaries of available skills

In [19]:
candidate_summary = ' '.join( [details['summary'] for details in desired_skills_summary_dict.values()])
print(candidate_summary)

Abdallah Mohamed Jumaa has demonstrated advanced proficiency in image classification through the development and fine-tuning of a MobileNet CNN for recognizing sign language digits. This project involved leveraging transfer learning to adapt a pre-trained model for specific image classification tasks, showcasing his deep learning expertise. His experience in model optimization for mobile applications further highlights his technical capabilities in this domain. Abdallah Mohamed Jumaa has demonstrated a solid foundation in computer vision through the development of a sign language digit recognizer using a fine-tuned MobileNet CNN. This project involved leveraging transfer learning to adapt a pre-trained model for image classification tasks, showcasing his ability to apply deep learning techniques to real-world problems. While his experience in computer vision is not as extensive as in NLP, his work on this project indicates a practical understanding of model optimization for mobile appl

README.md	    graph.ipynb		   nodes
chains		    graph.py		   output
data		    lang_requirements.txt  strong_applicant_characteristics.txt
desired_skills.txt  models
