In [None]:
# AI-Powered Resume Screening System
# Streamlit Web App Version

# Run this app using:
# streamlit run app.py

#importing libraries
import os
import re
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import nltk
from nltk.corpus import stopwords


import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# Text Cleaning Function

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z ]', ' ', text)
    tokens = text.split()
    tokens = [t for t in tokens if t not in stop_words]
    return ' '.join(tokens)

In [None]:
#Sample Resume Dataset

# In real projects, resumes are parsed from PDF/DOCX
# Here we simulate resumes for demonstration

data = {
    'resume_text': [
        'Python developer with machine learning and data science experience',
        'Java backend developer with Spring Boot and microservices',
        'Data analyst skilled in SQL, Excel, and Power BI',
        'AI engineer with deep learning and NLP experience',
        'Manual tester with selenium and test cases',
        'Frontend developer skilled in HTML CSS JavaScript',
        'Statistician with R, Python and data analysis',
        'Cloud engineer with AWS and DevOps tools'
    ],
    'label': [1, 0, 1, 1, 0, 0, 1, 0]  # 1 = Suitable, 0 = Not Suitable
}

df = pd.DataFrame(data)
df['cleaned_resume'] = df['resume_text'].apply(clean_text)
df

Unnamed: 0,resume_text,label,cleaned_resume
0,Python developer with machine learning and dat...,1,python developer machine learning data science...
1,Java backend developer with Spring Boot and mi...,0,java backend developer spring boot microservices
2,"Data analyst skilled in SQL, Excel, and Power BI",1,data analyst skilled sql excel power bi
3,AI engineer with deep learning and NLP experience,1,ai engineer deep learning nlp experience
4,Manual tester with selenium and test cases,0,manual tester selenium test cases
5,Frontend developer skilled in HTML CSS JavaScript,0,frontend developer skilled html css javascript
6,"Statistician with R, Python and data analysis",1,statistician r python data analysis
7,Cloud engineer with AWS and DevOps tools,0,cloud engineer aws devops tools


In [None]:
#Job Description Input

job_description = "Looking for a data scientist with Python, machine learning, NLP, and statistics"
job_description_clean = clean_text(job_description)
job_description_clean

'looking data scientist python machine learning nlp statistics'

In [None]:
#TF-IDF Vectorization

vectorizer = TfidfVectorizer()


resume_vectors = vectorizer.fit_transform(df['cleaned_resume'])
job_vector = vectorizer.transform([job_description_clean])

In [None]:
#Resume Matching using Cosine Similarity

similarity_scores = cosine_similarity(resume_vectors, job_vector)
df['match_score'] = similarity_scores


# Rank resumes
ranked_resumes = df.sort_values(by='match_score', ascending=False)
ranked_resumes[['resume_text', 'match_score']]

Unnamed: 0,resume_text,match_score
0,Python developer with machine learning and dat...,0.650769
3,AI engineer with deep learning and NLP experience,0.380097
6,"Statistician with R, Python and data analysis",0.344278
2,"Data analyst skilled in SQL, Excel, and Power BI",0.105768
1,Java backend developer with Spring Boot and mi...,0.0
4,Manual tester with selenium and test cases,0.0
5,Frontend developer skilled in HTML CSS JavaScript,0.0
7,Cloud engineer with AWS and DevOps tools,0.0


In [None]:
#Train AI Model for Resume Classification

X = resume_vectors
y = df['label']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
#Model Evaluation

new_resume = "Experienced data scientist skilled in Python, ML, deep learning, NLP"
new_resume_clean = clean_text(new_resume)
new_vector = vectorizer.transform([new_resume_clean])


prediction = model.predict(new_vector)
probability = model.predict_proba(new_vector)

#Predict Suitability for New Resume
print("Suitable:", "Yes" if prediction[0] == 1 else "No")
print("Confidence:", probability)

Suitable: Yes
Confidence: [[0.31662482 0.68337518]]


In [None]:
#Final Output Summary

print("Top Matching Resume:")
print(ranked_resumes.iloc[0]['resume_text'])
print("Match Score:", ranked_resumes.iloc[0]['match_score'])

Top Matching Resume:
Python developer with machine learning and data science experience
Match Score: 0.6507688954250134
