In [3]:
import nltk

In [4]:
import os
import pdfplumber
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import spacy
import tkinter as tk
from tkinter import filedialog, messagebox

nlp = spacy.load('en_core_web_sm')

# Function to extract text from a PDF file
def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        return ''.join(page.extract_text() for page in pdf.pages if page.extract_text())

# Function for text preprocessing
def preprocess(text):
    tokens = word_tokenize(text.lower())
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word.isalnum() and word not in stop_words]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return ' '.join(tokens)

# Function to calculate similarity using keyword matching
def calculate_similarity(job_requirements, resume_text):
    vectorizer = CountVectorizer(binary=True, stop_words=ENGLISH_STOP_WORDS)
    job_vector = vectorizer.fit_transform([job_requirements])
    resume_vector = vectorizer.transform([resume_text])
    similarity = cosine_similarity(job_vector, resume_vector)[0][0]
    return similarity

# Function to extract information from resume text
def extract_requirements_from_resume(resume_text):
    doc = nlp(resume_text.lower())
    extracted_info = {
        "sql": False,
        "javascript": False,
        "html": False,
        "java": False,
        "communication skill": False,
        "project management": False
    }
    for sentence in doc.sents:
        processed_sentence = preprocess(sentence.text)
        if any(keyword in processed_sentence for keyword in ["sql", "javascript", "html","java","communication skill","project management"]):
            extracted_info["sql"] = True
        if "javascript" in processed_sentence:
            extracted_info["javascript"] = True
        if "html" in processed_sentence:
            extracted_info["html"] = True
        if "java" in processed_sentence:
            extracted_info["java"] = True
        if "communication skill" in processed_sentence:
            extracted_info["communication skill"] = True
        if "project management" in processed_sentence:
            extracted_info["project management"] = True
    return extracted_info


# Function to handle file selection and display results
def process_files():
    job_requirements = entry_job_requirements.get()
    if not job_requirements:
        messagebox.showerror("Error", "Please enter job requirements.")
        return

    resume_folder_path = filedialog.askdirectory()
    if not resume_folder_path:
        return

    similarity_scores = []
    extracted_skills = []

    for filename in os.listdir(resume_folder_path):
        if filename.endswith(".pdf"):
            pdf_path = os.path.join(resume_folder_path, filename)
            resume_text = extract_text_from_pdf(pdf_path)
            processed_resume = preprocess(resume_text)
            similarity = calculate_similarity(job_requirements, processed_resume)
            similarity_scores.append((pdf_path, similarity * 100))
            extracted_info = extract_requirements_from_resume(resume_text)
            extracted_skills.append((pdf_path, extracted_info))

    # Update result_text with similarity scores for all resumes
    result_text = "Similarity Scores for All Resumes:\n"
    for score in similarity_scores:
        result_text += f"Resume: {score[0]}, Similarity Score: {score[1]:.2f}%\n"

    threshold = 70
    top_matches = [score for score in similarity_scores if score[1] >= threshold]

    if not top_matches:
        result_text += "\nNo matching resumes found."
    else:
        result_text += "\n\nShortlisted Resumes:\n"
        for match in top_matches:
            result_text += f"Resume: {match[0]}, Similarity Score: {match[1]:.2f}%\n"

    text_result.config(state="normal")
    text_result.delete(1.0, "end")
    text_result.insert("end", result_text)
    text_result.config(state="disabled")

    for resume, skills in extracted_skills:
        print(f"\nExtracted skills from {resume}:")
        for skill, present in skills.items():
            print(f"{skill.capitalize()}: {'Present' if present else 'Absent'}")


root = tk.Tk()
root.title("Resume Matcher")

frame = tk.Frame(root)
frame.pack(padx=10, pady=10)

label_job_requirements = tk.Label(frame, text="Enter Job Requirements:")
label_job_requirements.grid(row=0, column=0, sticky="w")

entry_job_requirements = tk.Entry(frame, width=50)
entry_job_requirements.grid(row=0, column=1)

button_process = tk.Button(frame, text="Process Resumes", command=process_files)
button_process.grid(row=1, column=0, columnspan=2, pady=10)

text_result = tk.Text(frame, height=30, width=60, state="disabled")
text_result.grid(row=2, column=0, columnspan=2)

root.mainloop()




Extracted skills from C:/Users/DELL/Documents/intern/Task4/resumes\10089434.pdf:
Sql: Present
Javascript: Absent
Html: Present
Java: Present
Communication skill: Absent
Project management: Absent

Extracted skills from C:/Users/DELL/Documents/intern/Task4/resumes\10247517.pdf:
Sql: Present
Javascript: Absent
Html: Absent
Java: Absent
Communication skill: Absent
Project management: Present

Extracted skills from C:/Users/DELL/Documents/intern/Task4/resumes\11187796.pdf:
Sql: Absent
Javascript: Absent
Html: Absent
Java: Absent
Communication skill: Absent
Project management: Absent

Extracted skills from C:/Users/DELL/Documents/intern/Task4/resumes\94230796.pdf:
Sql: Present
Javascript: Absent
Html: Absent
Java: Absent
Communication skill: Present
Project management: Absent

Extracted skills from C:/Users/DELL/Documents/intern/Task4/resumes\VIDHI_RESUME.pdf:
Sql: Present
Javascript: Present
Html: Present
Java: Present
Communication skill: Present
Project management: Absent
