In [21]:
import warnings
warnings.filterwarnings('ignore')

In [22]:
# Import library
import re #regx library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pypdf import PdfReader # Read pdf


%matplotlib inline

In [23]:
def pdf_read(url):
    reader = PdfReader(url)
    read = reader.pages[0]
    text = read.extract_text()
    return text

url = './resume.pdf'
resume = pdf_read(url)

In [24]:
import pickle
# load skill
with open("./list/skills.pkl", "rb") as f:
    skills = pickle.load(f)
# load education    
with open("./list/education.pkl", "rb") as f:
    education = pickle.load(f)
# load experience
with open("./list/experience.pkl", "rb") as f:
    experience = pickle.load(f)

In [None]:
# Extract Skill
def extract_skills(text):
    extracted_skills = [skill for skill in skills if re.search(rf'\b{skill}\b', text, re.IGNORECASE)]
    return extracted_skills

#  Education Extraction Function
def extract_education(text):
    degrees = education["degrees"]
    fields_of_study = education["fields_of_study"]
    institutions = education["institutions"]
    
    extracted_degrees = [deg for deg in degrees if re.search(rf'\b{deg}\b', text, re.IGNORECASE)]
    extracted_fields = [field for field in fields_of_study if re.search(rf'\b{field}\b', text, re.IGNORECASE)]
    extracted_institutions = [inst for inst in institutions if re.search(rf'\b{inst}\b', text, re.IGNORECASE)]
    
    return {
        "Degrees": extracted_degrees,
        "Fields of Study": extracted_fields,
        "Institutions": extracted_institutions
    }

# Extract experience
def extract_experience(text):
    extracted_experience = [exp for exp in experience if re.search(rf'\b{exp}\b', text, re.IGNORECASE)]
    return extracted_experience


In [218]:
# Apply on my resume
def extract_Resume(resume):
    skill = extract_skills(resume)
    education = extract_education(resume)
    experience = extract_experience(resume)
    
    result = {
        'skill': skill,
        'education': education,
        'experience': experience
    }

    # Convert the result to JSON format
    return result

In [None]:
def extract_Resume(resume):
    skill = extract_skills(resume)
    education = extract_education(resume)
    experience = extract_experience(resume)
    
    # Convert extracted features into a single string
    resume_text = f"Skills: {', '.join(skill)}. Education: {education}. Experience: {experience}."
    return resume_text

In [31]:
# extract_Resume(resume)

In [130]:
df = pd.read_csv('./datasets/data.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Job Title,Job Description
0,0,Flutter Developer,We are looking for hire experts flutter develo...
1,1,Django Developer,PYTHON/DJANGO (Developer/Lead) - Job Code(PDJ ...
2,2,Machine Learning,"Data Scientist (Contractor)\n\nBangalore, IN\n..."
3,3,iOS Developer,JOB DESCRIPTION:\n\nStrong framework outside o...
4,4,Full Stack Developer,job responsibility full stack engineer – react...


In [194]:
df[df['Job Title'] == 'Machine Learning']

Unnamed: 0.1,Unnamed: 0,Job Title,Job Description
2,2,Machine Learning,"Data Scientist (Contractor)\n\nBangalore, IN\n..."
11,11,Machine Learning,"Remote, Any where\n2 - 4 years\n₹8L - ₹12L / y..."
12,12,Machine Learning,"Excited by Big Data, Machine Learning and Pred..."
30,31,Machine Learning,Vista Equity Partners is the world's leading p...
33,34,Machine Learning,Staff Machine Learning Scientist\n\nHyderabad/...
...,...,...,...
2242,2367,Machine Learning,Your Profile:\nWill demonstrate cross function...
2259,2385,Machine Learning,Responsibilities\nStudy and transform data sci...
2261,2388,Machine Learning,Job Description\nAbout our client:\nOur Client...
2266,2393,Machine Learning,"PipeCandy is a 'one of its kind', 'data scienc..."


In [134]:
import re
def cleanResume(txt):
    cleanText = re.sub('http\S+\s', ' ', txt)
    cleanText = re.sub('RT|cc', ' ', cleanText)
    cleanText = re.sub('#\S+\s', ' ', cleanText)
    cleanText = re.sub('@\S+', '  ', cleanText)  
    cleanText = re.sub('[%s]' % re.escape("""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""), ' ', cleanText)
    cleanText = re.sub(r'[^\x00-\x7f]', ' ', cleanText) 
    cleanText = re.sub('\s+', ' ', cleanText)
    return cleanText

In [230]:
value = 2
jobDescription = extract_Resume(df['Job Description'][value])
myResume = extract_Resume(resume)

In [231]:
print(jobDescription['skill'])
print(myResume['skill'])

['Java', 'PyTorch', 'Big Data', 'Deep Learning', 'Machine Learning', 'TensorFlow', 'Keras', 'Python']
['MLflow', 'NLP', 'Scikit-learn', 'Django', 'OpenCV', 'SQL', 'C', 'Data Analysis', 'Java', 'LangChain', 'Computer Vision', 'React', 'Flask', 'Deep Learning', 'Machine Learning', 'TensorFlow', 'Keras', 'C++', 'FastAPI', 'Python', 'CSS', 'Generative AI', 'HTML']


## **Apply Machine Learning Algo**

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [208]:
vector = TfidfVectorizer()

In [209]:
tfidf_vector = vector.fit_transform([myResume,jobDescription])

In [210]:
cosine_sim = cosine_similarity(tfidf_vector[0:1], tfidf_vector[1:2])

In [211]:
print(f"Resume-Job Match Score: {cosine_sim[0][0] * 100:.2f}%")

Resume-Job Match Score: 38.53%


In [223]:
def skill_match_score(resume_skills, job_skills):
    # Convert skills to sets
    resume_set = set(resume_skills)
    job_set = set(job_skills)
    
    # Calculate intersection and union
    intersection = resume_set.intersection(job_set)
    match_score = (len(intersection) / len(job_set)) * 100  # Match based on job skills only
    
    return match_score, intersection

In [232]:
score, matched_skills = skill_match_score(myResume['skill'], jobDescription['skill'])

# Output result
print(f"Job Skill Match Score: {score:.2f}%")
print(f"Matched Skills: {', '.join(matched_skills)}")

Job Skill Match Score: 75.00%
Matched Skills: Java, Keras, Deep Learning, Python, Machine Learning, TensorFlow
