In [17]:



import pdfplumber
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


df = pd.read_excel("Resume_Parser_Dataset.xlsx")

# We only need Skills + Best_Matched_Role
df = df[["Skills", "Best_Matched_Role"]].dropna()



role_profiles = (
    df.groupby("Best_Matched_Role")["Skills"]
    .apply(lambda x: " ".join(x))
    .reset_index()
)



def extract_resume_text(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            if page.extract_text():
                text += page.extract_text()
    return text

resume_text = extract_resume_text("resume.pdf")


documents = [resume_text] + role_profiles["Skills"].tolist()

vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(documents)


similarity_scores = cosine_similarity(
    tfidf_matrix[0:1],
    tfidf_matrix[1:]
)[0]

role_profiles["Matching_Score (%)"] = similarity_scores * 100


result = role_profiles.sort_values(
    by="Matching_Score (%)",
    ascending=False
)



print("\n Resume Job Matching Results\n")

for _, row in result.iterrows():
    print(f"{row['Best_Matched_Role']} : {row['Matching_Score (%)']:.2f}%")

print("\n Best Suitable Job Role:")
print(result.iloc[0]["Best_Matched_Role"])



 Resume Job Matching Results

Developer : 3.95%
QA : 3.66%
HR : 3.47%
Product Owner : 3.43%
UX Designer : 2.91%

 Best Suitable Job Role:
Developer
