In [1]:
#THIS IS TEST CODE WHERE I AM TRYING NEW METHOD

import re
from docx import Document
from sentence_transformers import SentenceTransformer, util

# Function to clean and remove personal information like name, email, phone, dates
def remove_personal_info(text):
    text = re.sub(r'^[A-Z][a-z]+\s[A-Z][a-z]+$', '', text)  # Example: "John Doe"
    text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b', '', text)  # Remove emails
    text = re.sub(r'\b\d{10}\b', '', text)  # Remove phone numbers
    text = re.sub(r'\b(?:Contact|Email|Phone|Name|Address)\b.*\n', '', text)  # Remove headers
    text = re.sub(r'\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-zA-Z]+ \d{4}-\d{4}', '', text)  # Remove date ranges
    text = re.sub(r'\b(?:Work Experience|Education|Skills|Certifications|References)\b.*\n', '', text)  # Remove section headers
    return text

# Function to read text from a .docx file
def read_docx(file_path):
    doc = Document(file_path)
    text = ""
    for para in doc.paragraphs:
        text += para.text + "\n"
    return text

# Function to calculate similarity score using a Sentence-Transformers model
def calculate_similarity(cv_text, jd_text):
    model = SentenceTransformer('all-MiniLM-L6-v2')  # Pre-trained model for embeddings
    embeddings = model.encode([cv_text, jd_text], convert_to_tensor=True)
    similarity_score = util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()
    return similarity_score

# Function to extract key phrases from the CV and JD using embeddings
def extract_key_phrases(text):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    sentences = text.split('\n')
    embeddings = model.encode(sentences, convert_to_tensor=True)
    return embeddings, sentences

# Function to generate actionable recommendations
def generate_recommendations(cv_embeddings, jd_embeddings, cv_sentences, jd_sentences):
    recommendations = []
    added_recommendations = set()  # Track recommendations to avoid duplicates

    for i, cv_emb in enumerate(cv_embeddings):
        similarities = [util.pytorch_cos_sim(cv_emb, jd_emb).item() for jd_emb in jd_embeddings]
        max_similarity = max(similarities)
        
        # If similarity is low, recommend improving or adding that part of the CV
        if max_similarity < 0.6:  # Threshold adjusted to improve precision
            sentence = cv_sentences[i]
            if sentence not in added_recommendations:
                recommendations.append(f"Consider adding or improving: {sentence}")
                added_recommendations.add(sentence)

    if not recommendations:
        recommendations.append("Your CV is well-aligned with the job description.")
    
    return recommendations

# Function to generate technology-specific recommendations
def generate_technology_recommendations(cv_text, job_description_text):
    it_keywords = ['python', 'java', 'c++', 'blockchain', 'cloud', 'azure', 'aws', 'docker', 'devops', 'kubernetes', 'data science', 'linux', 'javascript']
    
    cv_technologies = [keyword for keyword in it_keywords if keyword.lower() in cv_text.lower()]
    jd_technologies = [keyword for keyword in it_keywords if keyword.lower() in job_description_text.lower()]
    
    recommendations = []
    
    # Recommend adding missing technologies from JD to CV
    for tech in jd_technologies:
        if tech not in cv_technologies:
            recommendations.append(f"Consider adding experience with {tech.capitalize()}.")
    
    # Recommend removing irrelevant technologies from CV
    for tech in cv_technologies:
        if tech not in jd_technologies:
            recommendations.append(f"Consider removing experience with {tech.capitalize()}.")
    
    return recommendations

# Main function to analyze CV and Job Description using pre-trained models
def analyze_cv_job(cv_text, job_description_text):
    # Remove personal information and irrelevant sections (e.g., names, dates, sections like "Work Experience")
    cv_text_clean = remove_personal_info(cv_text)
    job_description_text_clean = remove_personal_info(job_description_text)
    
    # Calculate overall similarity score
    similarity_score = calculate_similarity(cv_text_clean, job_description_text_clean)
    
    # Extract key phrases and sentences from CV and JD
    cv_embeddings, cv_sentences = extract_key_phrases(cv_text_clean)
    jd_embeddings, jd_sentences = extract_key_phrases(job_description_text_clean)
    
    # Generate actionable recommendations based on sentence similarities
    sentence_recommendations = generate_recommendations(cv_embeddings, jd_embeddings, cv_sentences, jd_sentences)
    
    # Generate IT-specific technology recommendations (add/remove technologies)
    tech_recommendations = generate_technology_recommendations(cv_text_clean, job_description_text_clean)
    
    # Combine recommendations
    all_recommendations = sentence_recommendations + tech_recommendations
    
    return similarity_score, all_recommendations

# Example usage
cv_file_path = 'resume.docx'  # Replace with your CV file path
jd_file_path = 'job_description.docx'  # Replace with your Job Description file path

cv_text = read_docx(cv_file_path)
job_description_text = read_docx(jd_file_path)

similarity_score, recommendations = analyze_cv_job(cv_text, job_description_text)

print(f"Similarity Score: {similarity_score:.2f}")
print("Recommendations:")
for rec in recommendations:
    print(rec)


Similarity Score: 0.50
Recommendations:
Consider adding or improving: Summary
Consider adding or improving: Highlights
Consider adding or improving: Active Directory Group Policy Objects
Consider adding or improving: PowerShe l and VBScript Microsoft Exchange VMWare experience
Consider adding or improving: New technology and product research O fice 365 and Azure
Consider adding or improving: Storage management Enterprise backup management Disaster recovery
Consider adding or improving: Experience
Consider adding or improving: Information Technology Technician I Aug 2007 to Current Company  Migrating and managing user accounts in Microsoft O fice 365 and Exchange Online.
Consider adding or improving:  Creating and managing virtual machines for systems such as domain contro lers and Active Directory Federation Services (ADFS) in Microsoft Windows Azure (IaaS).
Consider adding or improving:  Creating and managing storage in Microsoft Windows Azure (IaaS).
Consider adding or improving:  In

In [6]:
import tkinter as tk
from tkinter import filedialog, messagebox
import re
from docx import Document
from sentence_transformers import SentenceTransformer, util

# Function to read text from a .docx file
def read_docx(file_path):
    doc = Document(file_path)
    text = "\n".join([para.text for para in doc.paragraphs])
    return text

# Function to analyze CV and Job Description
def analyze_files(cv_path, jd_path):
    if not cv_path or not jd_path:
        messagebox.showerror("Error", "Please upload both Resume and Job Description.")
        return
    
    cv_text = read_docx(cv_path)
    jd_text = read_docx(jd_path)
    
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode([cv_text, jd_text], convert_to_tensor=True)
    similarity_score = util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()
    
    messagebox.showinfo("Analysis Complete", f"Similarity Score: {similarity_score:.2f}")

# GUI Setup
root = tk.Tk()
root.title("CV vs Job Description Comparator")
root.geometry("500x300")

cv_path = ""
jd_path = ""

def upload_cv():
    global cv_path
    cv_path = filedialog.askopenfilename(filetypes=[("Word Documents", "*.docx")])
    cv_label.config(text=f"Uploaded: {cv_path.split('/')[-1]}")

def upload_jd():
    global jd_path
    jd_path = filedialog.askopenfilename(filetypes=[("Word Documents", "*.docx")])
    jd_label.config(text=f"Uploaded: {jd_path.split('/')[-1]}")

# UI Elements
tk.Label(root, text="Upload Resume (CV)").pack()
cv_label = tk.Label(root, text="No file uploaded", fg="gray")
cv_label.pack()
tk.Button(root, text="Upload Resume", command=upload_cv).pack()

tk.Label(root, text="Upload Job Description").pack()
jd_label = tk.Label(root, text="No file uploaded", fg="gray")
jd_label.pack()
tk.Button(root, text="Upload Job Description", command=upload_jd).pack()

tk.Button(root, text="Start Analysis", command=lambda: analyze_files(cv_path, jd_path)).pack()

root.mainloop()
