In [1]:
import warnings
warnings.filterwarnings('ignore')

In [10]:
# Import library
import re #regx library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pypdf import PdfReader # Read pdf
import streamlit as st
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
import pickle
# load skill
with open("./list/skills.pkl", "rb") as f:
    skills = pickle.load(f)
# load education    
with open("./list/education.pkl", "rb") as f:
    education = pickle.load(f)
# load experience
with open("./list/experience.pkl", "rb") as f:
    experience = pickle.load(f)

# Load the trained classifier
classfier_predict = pickle.load(open('./list/classfier.pkl', 'rb'))
tdiff_vector = pickle.load(open('./list/tdif.pkl', 'rb'))

In [4]:
classes = ['Advocate', 'Arts', 'Automation Testing', 'Blockchain',
       'Business Analyst', 'Civil Engineer', 'Data Science', 'Database',
       'DevOps Engineer', 'DotNet Developer', 'ETL Developer',
       'Electrical Engineering', 'HR', 'Hadoop', 'Health and fitness',
       'Java Developer', 'Mechanical Engineer',
       'Network Security Engineer', 'Operations Manager', 'PMO',
       'Python Developer', 'SAP Developer', 'Sales', 'Testing',
       'Web Designing']

In [5]:
# Read pdf 
def pdf_read(url):
    reader = PdfReader(url)
    read = reader.pages[0]
    text = read.extract_text()
    return text

In [7]:
# Extract Skill
def extract_skills(text):
    extracted_skills = [skill for skill in skills if re.search(rf'\b{skill}\b', text, re.IGNORECASE)]
    return extracted_skills

#  Education Extraction Function
def extract_education(text):
    degrees = education["degrees"]
    fields_of_study = education["fields_of_study"]
    institutions = education["institutions"]
    
    extracted_degrees = [deg for deg in degrees if re.search(rf'\b{deg}\b', text, re.IGNORECASE)]
    extracted_fields = [field for field in fields_of_study if re.search(rf'\b{field}\b', text, re.IGNORECASE)]
    extracted_institutions = [inst for inst in institutions if re.search(rf'\b{inst}\b', text, re.IGNORECASE)]
    
    return {
        "Degrees": extracted_degrees,
        "Study": extracted_fields,
        "Institutions": extracted_institutions
    }

# Extract experience
def extract_experience(text):
    extracted_experience = [exp for exp in experience if re.search(rf'\b{exp}\b', text, re.IGNORECASE)]
    return extracted_experience

In [8]:
def extract_Resume(resume):
    skill = extract_skills(resume)
    education = extract_education(resume)
    experience = extract_experience(resume)
    
    result = {
        'skill': skill,
        'education': education,
        'experience': experience
    }

    # Convert the result to JSON format
    return result

In [11]:
def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', '  ', cleanText)  
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText) 
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText

In [13]:
def get_ResumeSimilarity(resume,job):
    vector = TfidfVectorizer()
    vector_res = vector.fit_transform([resume,job])
    cosine_sim = cosine_similarity(vector_res[0:1], vector_res[1:2])
    return cosine_sim.item() * 100

In [14]:
def Predict_Resume(resume):
    # Transform the cleaned resume using the trained TfidfVectorizer
    input_features = tdiff_vector.transform([resume])
    # Make the prediction using the loaded classifier
    prediction_id = classfier_predict.predict(input_features)[0]
    return classes[prediction_id]

In [15]:
def matching_score(resume_, job_):
    # Convert skills to sets
    resume_set = set(resume_)
    job_set = set(job_)
    
    # Calculate intersection and union
    intersection = resume_set.intersection(job_set)
    match_score = (len(intersection) / len(job_set)) * 100  # Match based on job skills only
    
    return match_score, intersection

In [16]:
def predict(resume,job):
    clean_resume = cleanResume(resume)
    clean_job = cleanResume(job)

    # Predict Resume 
    resule_predict = Predict_Resume(clean_resume)
    # Extract Resume
    resume_extract = extract_Resume(clean_resume)
    job_extract = extract_Resume(clean_job)
    # Matching Resume Skill and Job's
    score_skill, matched_skills = matching_score(resume_extract['skill'], job_extract['skill'])
    score_education, matched_education = matching_score(resume_extract['education'], job_extract['education'])
    score_experience, matched_experience = matching_score(resume_extract['experience'], job_extract['experience'])

    # Find Similarity Using cosine
    similiraty_skill = get_ResumeSimilarity(str(resume_extract), str(job_extract))
    
    
    # Print Result
    print(f'Your clean Resume: {clean_resume}\n')
    print(f'Your clean Job: {job}\n\n\n')
    
    print(f'Your Resume Related to: {resule_predict}\n\n\n')
    
    print(f'Your Resume Skill: {resume_extract}\n')
    print(f'Your Job Skill: {job_extract}\n\n\n')
    
    print(f"Job Skill Match Score: {score_skill:.2f}%")
    print(f"Matched Skills: {', '.join(matched_skills)}\n\n")

    print(f"Job Education Match Score: {score_education:.2f}%")
    print(f"Matched education: {', '.join(matched_education)}\n\n")

    print(f"Job Experience Match Score: {score_experience:.2f}%")
    print(f"Matched experience: {', '.join(matched_experience)}\n\n")

    print(f"Job Resume match Score: {similiraty_skill:.2f}%")

In [18]:
with open('./job.txt', 'r', encoding='utf-8') as file:
    job = file.readlines()  # Returns a list of lines

In [20]:
res = pdf_read('./resume.pdf')

predict(res,str(job))

Your clean Resume: Ali Hassan 26 Awami Colony Ghazi Road Lahore 92 03048630925 github com alihassanml kaggle com alihassanml alihassanml vercel app alihassanbscs99 EDUCATI ON COLLE GE UNIVER SITY Intermediate in Computer Science Defense Degree College Getting A Grade 84 1st Year 79 2nd Year 2020 22 Bachelor of Science in Computer Science Lahore Garrison University 6th Semester In Progress 2022 26 PRO FE SSION AL E XP ERIE NCE Professional Summary A results driven Data Scientist Machine Learning Engineer and Full Stack Developer with expertise in designing developing and deploying AI powered solutions With a strong foundation in mathematics statistics I specialize in machine learning deep learning and generative AI leveraging technologies like LangChain LSTMs Transformer and YOLO based object detection My experience extends to MLOps and cloud deployment including AWS CI CD pipelines and Docker ensuring scalable and efficient model operations Additionally my full stack development skills

In [33]:
clean_resume = cleanResume(str(res)) # Clean Resume
clean_job = cleanResume(str(job)) # Clean Job
# Extract Resume and Job
resume_extract = extract_Resume(clean_resume)
job_extract = extract_Resume(clean_job)

score_skill, matched_skills = matching_score(resume_extract['skill'], job_extract['skill'])
score_education, matched_education = matching_score(resume_extract['education']['Study'],job_extract['education']['Study'])
score_experience, matched_experience = matching_score(resume_extract['experience'], job_extract['experience'])

In [30]:
print(resume_extract['education']['Study'])
print(job_extract['education']['Study'])

['Computer Science', 'Machine Learning', 'Web Development', 'Mathematics', 'Statistics']
['Computer Science', 'Data Science', 'Machine Learning', 'Mathematics', 'Statistics']


In [37]:
score_skill,score_education,score_experience)

(54.54545454545454, 80.0, 50.0)

In [43]:
rank = ((score_skill + score_education+score_experience)/300)*100
rank

61.51515151515151

In [54]:
import warnings
warnings.filterwarnings('ignore')

# Import library
import re #regx library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pypdf import PdfReader # Read pdf
import streamlit as st
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sentence_transformers import SentenceTransformer


import pickle
# load skill
with open("./list/skills.pkl", "rb") as f:
    skills = pickle.load(f)
# load education    
with open("./list/education.pkl", "rb") as f:
    education = pickle.load(f)
# load experience
with open("./list/experience.pkl", "rb") as f:
    experience = pickle.load(f)

# Load the trained classifier
classfier_predict = pickle.load(open('./list/classfier.pkl', 'rb'))
tdiff_vector = pickle.load(open('./list/tdif.pkl', 'rb'))

# load model
model = SentenceTransformer("all-MiniLM-L6-v2")

def text_to_vector(text):
    return model.encode(text)




classes = ['Advocate', 'Arts', 'Automation Testing', 'Blockchain',
       'Business Analyst', 'Civil Engineer', 'Data Science', 'Database',
       'DevOps Engineer', 'DotNet Developer', 'ETL Developer',
       'Electrical Engineering', 'HR', 'Hadoop', 'Health and fitness',
       'Java Developer', 'Mechanical Engineer',
       'Network Security Engineer', 'Operations Manager', 'PMO',
       'Python Developer', 'SAP Developer', 'Sales', 'Testing',
       'Web Designing']


# Read pdf 
def pdf_read(url):
    reader = PdfReader(url)
    read = reader.pages[0]
    text = read.extract_text()
    return text


# Extract Skill
def extract_skills(text):
    extracted_skills = [skill for skill in skills if re.search(rf'\b{skill}\b', text, re.IGNORECASE)]
    return extracted_skills

#  Education Extraction Function
def extract_education(text):
    degrees = education["degrees"]
    fields_of_study = education["fields_of_study"]
    institutions = education["institutions"]
    
    extracted_degrees = [deg for deg in degrees if re.search(rf'\b{deg}\b', text, re.IGNORECASE)]
    extracted_fields = [field for field in fields_of_study if re.search(rf'\b{field}\b', text, re.IGNORECASE)]
    extracted_institutions = [inst for inst in institutions if re.search(rf'\b{inst}\b', text, re.IGNORECASE)]
    
    return {
        "Degrees": extracted_degrees,
        "Study": extracted_fields,
        "Institutions": extracted_institutions
    }

# Extract experience
def extract_experience(text):
    extracted_experience = [exp for exp in experience if re.search(rf'\b{exp}\b', text, re.IGNORECASE)]
    return extracted_experience


def extract_Resume(resume):
    skill = extract_skills(resume)
    education = extract_education(resume)
    experience = extract_experience(resume)
    
    result = {
        'skill': skill,
        'education': education,
        'experience': experience
    }

    # Convert the result to JSON format
    return result


def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', '  ', cleanText)  
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText) 
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText


def get_ResumeSimilarity(resume,job):
    resume_vector = text_to_vector(resume)
    job_vector = text_to_vector(job)
    cosine_sim = cosine_similarity([resume_vector], [job_vector])[0][0]
    return cosine_sim * 100

def Predict_Resume(resume):
    # Transform the cleaned resume using the trained TfidfVectorizer
    input_features = tdiff_vector.transform([resume])
    # Make the prediction using the loaded classifier
    prediction_id = classfier_predict.predict(input_features)[0]
    return classes[prediction_id]

def matching_score(resume_, job_):
    # Convert skills to sets
    resume_set = set(resume_)
    job_set = set(job_)
    
    # Calculate intersection and union
    intersection = resume_set.intersection(job_set)
    match_score = (len(intersection) / len(job_set)) * 100  # Match based on job skills only
    
    return match_score, intersection



def predict(resume, job):
    clean_resume = cleanResume(resume)
    clean_job = cleanResume(job)

    # Predict Resume 
    resule_predict = Predict_Resume(clean_resume)
    # Extract Resume
    resume_extract = extract_Resume(clean_resume)
    job_extract = extract_Resume(clean_job)
    # Matching Resume Skill and Job's
    score_skill, matched_skills = matching_score(resume_extract['skill'], job_extract['skill'])
    score_education, matched_education = matching_score(resume_extract['education']['Study'],job_extract['education']['Study'])
    score_experience, matched_experience = matching_score(resume_extract['experience'], job_extract['experience'])
    
    # Find Similarity Using cosine
    similiraty_skill = get_ResumeSimilarity(str(resume), str(job))
    rank = ((score_skill + score_education)/200)*100
    
    # Fix the JSON serialization issue by converting sets to lists
    result = {
        "clean_resume": clean_resume,
        "clean_job": job,
        "resume_related_to": resule_predict,
        "resume_skills": resume_extract,
        "job_skills": job_extract,
        "job_skill_match_score": f"{score_skill:.2f}%",
        "matched_skills": list(matched_skills),  # Convert set to list
        "job_education_match_score": f"{score_education:.2f}%",
        "matched_education": list(matched_education),  # Convert set to list
        "job_experience_match_score": f"{score_experience:.2f}%",
        "matched_experience": list(matched_experience),  # Convert set to list
        "job_resume_match_score": f"{similiraty_skill:.2f}%",
        "resume_rank": f"{rank:.2f}%"
    }

    return result




def resume_result(resume_text,job):
    return predict(resume_text,str(job))
result_my = resume_result(res,str(job))

In [60]:
result_my['job_skills']['skill']

['AWS',
 'SQL',
 'Hadoop',
 'PyTorch',
 'Big Data',
 'Statistical Analysis',
 'Azure',
 'Machine Learning',
 'TensorFlow',
 'Data Science',
 'Python']