In [3]:
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
# Function to extract text from PDF
def extract_text_from_pdf(file):
    pdf = PdfReader(file)
    text = ""
    for page in pdf.pages:
        text += page.extract_text()
    return text

In [6]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def rank_resumes(job_description, resumes):
    """
    Ranks resumes based on their similarity to a job description using TF-IDF and cosine similarity.

    Args:
        job_description (str): The text of the job description.
        resumes (list of str): A list of resume texts.

    Returns:
        numpy.ndarray: An array of cosine similarity scores, where each score represents the similarity of a resume to the job description.
    """

    documents = [job_description] + resumes
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform(documents).toarray()

    # Calculate cosine similarity
    job_description_vector = vectors[0].reshape(1, -1)  # Reshape for cosine_similarity
    resume_vectors = vectors[1:]

    cosine_similarities = cosine_similarity(job_description_vector, resume_vectors).flatten()

    return cosine_similarities

# Example usage (demonstrating how to use the function):

job_desc = "Looking for a software engineer with strong Python and machine learning skills."
resume1 = "Experienced software engineer with 5 years of Python development and machine learning projects."
resume2 = "A marketing professional with experience in social media management."
resume3 = "Python developer with 2 years of experience."

resume_list = [resume1, resume2, resume3]

similarities = rank_resumes(job_desc, resume_list)

print("Cosine Similarities:", similarities)
for i, similarity in enumerate(similarities):
  print(f"Resume {i+1} similarity: {similarity}")

# Example of how to add the results to a list of dicts.
ranked_resumes = []
for i, similarity in enumerate(similarities):
  ranked_resumes.append({"resume_text": resume_list[i], "similarity": similarity})

print(ranked_resumes)

Cosine Similarities: [0.47894847 0.0371655  0.12937591]
Resume 1 similarity: 0.47894847126501505
Resume 2 similarity: 0.03716549903926591
Resume 3 similarity: 0.12937591198581325
[{'resume_text': 'Experienced software engineer with 5 years of Python development and machine learning projects.', 'similarity': 0.47894847126501505}, {'resume_text': 'A marketing professional with experience in social media management.', 'similarity': 0.03716549903926591}, {'resume_text': 'Python developer with 2 years of experience.', 'similarity': 0.12937591198581325}]


In [7]:
# Streamlit app
st.title("AI Resume Screening & Candidate Ranking System")
# Job description input
st.header("Job Description")
job_description= st.text_area ("Enter the job description")

2025-03-31 08:51:19.026 
  command:

    streamlit run d:\a\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-03-31 08:51:19.036 Session state does not function when running a script without `streamlit run`


In [8]:
# File uploader
st.header("Upload Resumes")
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)



In [9]:
if uploaded_files and job_description:
    st.header("Ranking Resumes")
    
    resumes = []
    for file in uploaded_files:
        text = extract_text_from_pdf(file)
        resumes.append(text)

    # Rank resumes
    scores = rank_resumes(job_description, resumes)

    # Display scores
    results = pd.DataFrame({"Resume": [file.name for file in uploaded_files], "Score": score })
    results = results.sort_values(by="Score", ascending=False)
    
    st.write(results)