In [12]:
import tkinter as tk
from tkinter import filedialog, scrolledtext, ttk, messagebox
import PyPDF2
import spacy
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import re
import threading
import time

# Global variables for models
nlp = None
grammar_model = None
similarity_model = None
models_loaded = False

# Define resume sections with better keywords
SECTION_TEMPLATES = {
    "Contact Information": ["phone", "email", "address", "linkedin", "contact", "mobile", "tel", "@"],
    "Professional Summary": ["summary", "profile", "objective", "about", "overview", "career goal"],
    "Education": ["education", "degree", "university", "college", "school", "bachelor", "master", "phd", "graduated", "gpa", "academic"],
    "Work Experience": ["experience", "employment", "work", "job", "position", "role", "company", "worked", "responsibilities", "achievements"],
    "Skills": ["skills", "technical", "proficient", "expertise", "programming", "languages", "tools", "technologies", "competencies"],
    "Projects": ["projects", "developed", "built", "created", "implemented", "designed", "research", "portfolio"],
    "Certifications": ["certification", "certified", "license", "credential", "certificate", "award", "recognition"],
    "Languages": ["languages", "fluent", "native", "bilingual", "multilingual", "spanish", "french", "german"]
}

# Improved weak phrases detection
WEAK_PHRASES = [
    'some', 'various', 'etc.', 'responsible for', 'duties include', 'tasks include',
    'helped with', 'assisted with', 'worked on', 'dealt with', 'handled',
    'many', 'several', 'numerous', 'stuff', 'things', 'good at', 'familiar with'
]

# Strong action words for suggestions
STRONG_ACTION_WORDS = [
    'achieved', 'implemented', 'developed', 'created', 'designed', 'managed',
    'led', 'improved', 'increased', 'reduced', 'optimized', 'streamlined',
    'collaborated', 'coordinated', 'executed', 'delivered', 'established',
    'initiated', 'launched', 'maintained', 'mentored', 'negotiated'
]

# ---------------- Model Loading Functions ----------------

def load_models():
    """Load all NLP models in a separate thread"""
    global nlp, grammar_model, similarity_model, models_loaded
    
    try:
        status_label.config(text="üîÑ Loading spaCy model...", fg="#fd7e14")
        root.update()
        
        # Load spaCy model
        nlp = spacy.load("en_core_web_sm")
        
        status_label.config(text="üîÑ Loading grammar correction model...", fg="#fd7e14")
        root.update()
        
        # Load grammar correction model
        grammar_model = pipeline("text2text-generation", model="prithivida/grammar_error_correcter_v1")
        
        status_label.config(text="üîÑ Loading similarity model...", fg="#fd7e14")
        root.update()
        
        # Load sentence transformer
        similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
        
        models_loaded = True
        status_label.config(text="‚úÖ All models loaded successfully! Ready to analyze.", fg="#198754")
        upload_button.config(state="normal")
        
    except Exception as e:
        messagebox.showerror("Model Loading Error", f"Failed to load models: {str(e)}")
        status_label.config(text="‚ùå Error loading models", fg="#dc3545")

def start_model_loading():
    """Start model loading in a separate thread"""
    thread = threading.Thread(target=load_models)
    thread.daemon = True
    thread.start()

# ---------------- Core Functions ----------------

def extract_text_from_pdf(file_path):
    """Extract text from PDF file"""
    text = ""
    try:
        with open(file_path, "rb") as f:
            reader = PyPDF2.PdfReader(f)
            for page in reader.pages:
                content = page.extract_text()
                if content:
                    text += content + "\n"
    except Exception as e:
        messagebox.showerror("Error Reading PDF", f"Could not read PDF file: {str(e)}")
    return text.strip()

def correct_grammar(text):
    """Improve grammar correction with better chunking"""
    if not grammar_model:
        return text
    
    # Split into sentences
    sentences = re.split(r'(?<=[.!?])\s+', text)
    corrected_sentences = []
    
    for sentence in sentences:
        if len(sentence.strip()) > 0:
            try:
                # Clean the sentence
                cleaned = sentence.strip()
                if len(cleaned) > 5:  # Only process meaningful sentences
                    # Use the grammar model
                    result = grammar_model(cleaned, max_length=len(cleaned) + 50, do_sample=False)
                    if result and len(result) > 0:
                        corrected = result[0]['generated_text'].strip()
                        corrected_sentences.append(corrected)
                    else:
                        corrected_sentences.append(cleaned)
                else:
                    corrected_sentences.append(cleaned)
            except Exception as e:
                print(f"Grammar correction error: {e}")
                corrected_sentences.append(sentence.strip())
    
    return " ".join(corrected_sentences)

def detect_passive_sentences(text):
    """Detect passive voice sentences using spaCy"""
    if not nlp:
        return []
    
    doc = nlp(text)
    passive_sentences = []
    
    for sent in doc.sents:
        # Look for passive voice indicators
        has_passive = False
        for token in sent:
            # Check for auxiliary verbs with past participles
            if token.dep_ == "auxpass" or (token.lemma_ in ["be", "get"] and any(child.tag_ == "VBN" for child in token.children)):
                has_passive = True
                break
        
        if has_passive:
            passive_sentences.append(sent.text.strip())
    
    return passive_sentences

def detect_weak_phrases(text):
    """Detect weak/vague phrases in the text"""
    found_phrases = []
    text_lower = text.lower()
    
    for phrase in WEAK_PHRASES:
        if phrase.lower() in text_lower:
            found_phrases.append(phrase)
    
    return found_phrases

def detect_sections_improved(text):
    """Improved section detection using multiple methods"""
    detected_sections = {}
    text_lower = text.lower()
    
    # Method 1: Direct keyword matching
    for section, keywords in SECTION_TEMPLATES.items():
        score = 0
        for keyword in keywords:
            if keyword.lower() in text_lower:
                score += 1
        
        # If we found enough keywords, consider section present
        if score >= 1:  # Lowered threshold for better detection
            detected_sections[section] = score
    
    # Method 2: Header-based detection
    lines = text.split('\n')
    for i, line in enumerate(lines):
        line_clean = line.strip().lower()
        if len(line_clean) > 0:
            for section, keywords in SECTION_TEMPLATES.items():
                for keyword in keywords:
                    if keyword in line_clean and len(line_clean) < 50:  # Likely a header
                        detected_sections[section] = detected_sections.get(section, 0) + 2
    
    # Method 3: Semantic similarity (if model is available)
    if similarity_model:
        try:
            paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
            for section, keywords in SECTION_TEMPLATES.items():
                if section not in detected_sections:
                    section_desc = " ".join(keywords)
                    section_embedding = similarity_model.encode(section_desc)
                    
                    max_similarity = 0
                    for para in paragraphs:
                        if len(para) > 20:  # Skip very short paragraphs
                            para_embedding = similarity_model.encode(para)
                            similarity = similarity_model.similarity([section_embedding], [para_embedding])[0][0]
                            max_similarity = max(max_similarity, float(similarity))
                    
                    if max_similarity > 0.3:  # Lowered threshold
                        detected_sections[section] = max_similarity
        except Exception as e:
            print(f"Similarity detection error: {e}")
    
    return detected_sections

def get_improvement_suggestions(text, detected_sections, passive_sentences, weak_phrases):
    """Generate improvement suggestions"""
    suggestions = []
    
    # Word count suggestions
    word_count = len(text.split())
    if word_count < 200:
        suggestions.append("üìù Add more detail to your experiences and achievements (current: {} words)".format(word_count))
    elif word_count > 800:
        suggestions.append("‚úÇÔ∏è Consider condensing content to make it more concise (current: {} words)".format(word_count))
    
    # Section suggestions
    missing_sections = []
    for section in SECTION_TEMPLATES:
        if section not in detected_sections:
            missing_sections.append(section)
    
    if missing_sections:
        suggestions.append("üìÇ Add missing sections: {}".format(', '.join(missing_sections)))
    
    # Passive voice suggestions
    if passive_sentences:
        suggestions.append("üé≠ Replace {} passive voice sentences with active voice for stronger impact".format(len(passive_sentences)))
    
    # Weak phrases suggestions
    if weak_phrases:
        suggestions.append("üí™ Replace weak phrases like '{}' with stronger action words".format(', '.join(weak_phrases[:3])))
    
    # Action words suggestion
    action_word_count = sum(1 for word in STRONG_ACTION_WORDS if word in text.lower())
    if action_word_count < 5:
        suggestions.append("üöÄ Use more strong action words (achieved, implemented, developed, managed, etc.)")
    
    # Quantification suggestion
    numbers = re.findall(r'\d+', text)
    if len(numbers) < 3:
        suggestions.append("üìä Add more quantifiable achievements (numbers, percentages, metrics)")
    
    # Contact information check
    if "Contact Information" not in detected_sections:
        suggestions.append("üìû Ensure your contact information (phone, email, LinkedIn) is clearly visible")
    
    # Skills section enhancement
    if "Skills" in detected_sections:
        suggestions.append("üõ†Ô∏è Group technical skills by category (Programming, Tools, Languages, etc.)")
    
    return suggestions

def calculate_resume_score(text, detected_sections, passive_sentences, weak_phrases):
    """Calculate comprehensive resume score"""
    score = 100.0
    
    # Word count scoring (20 points)
    word_count = len(text.split())
    if word_count < 150:
        score -= 15
    elif word_count < 250:
        score -= 8
    elif word_count > 800:
        score -= 5
    
    # Section completeness scoring (30 points)
    section_score = (len(detected_sections) / len(SECTION_TEMPLATES)) * 30
    score = score - 30 + section_score
    
    # Passive voice penalty (15 points)
    passive_penalty = min(len(passive_sentences) * 2, 15)
    score -= passive_penalty
    
    # Weak phrases penalty (15 points)
    weak_penalty = min(len(weak_phrases) * 3, 15)
    score -= weak_penalty
    
    # Action words bonus (10 points)
    action_words = sum(1 for word in STRONG_ACTION_WORDS if word in text.lower())
    action_bonus = min(action_words * 1, 10)
    score += action_bonus - 10  # Start with -10, add back based on action words
    
    # Quantification bonus (10 points)
    numbers = re.findall(r'\d+', text)
    quant_bonus = min(len(numbers) * 2, 10)
    score += quant_bonus - 10  # Start with -10, add back based on numbers
    
    return max(0, min(100, round(score, 1)))

# ---------------- GUI Functions ----------------

def upload_file():
    """Handle file upload and analysis"""
    if not models_loaded:
        messagebox.showwarning("Models Not Loaded", "Please wait for models to load before uploading.")
        return
    
    file_path = filedialog.askopenfilename(
        title="Select Resume PDF",
        filetypes=[("PDF Files", "*.pdf"), ("All Files", "*.*")]
    )
    
    if not file_path:
        return
    
    try:
        # Update status
        status_label.config(text="üìÑ Analyzing resume... Please wait.", fg="#fd7e14")
        root.update()
        
        # Extract text
        raw_text = extract_text_from_pdf(file_path)
        if not raw_text:
            messagebox.showerror("Error", "Could not extract text from PDF. Please ensure it's a valid PDF file.")
            return
        
        # Display original text
        text_area.delete(1.0, tk.END)
        text_area.insert(tk.END, raw_text)
        
        # Grammar correction
        status_label.config(text="‚úçÔ∏è Correcting grammar...", fg="#fd7e14")
        root.update()
        corrected_text = correct_grammar(raw_text)
        
        correction_output.config(state="normal")
        correction_output.delete(1.0, tk.END)
        correction_output.insert(tk.END, corrected_text)
        correction_output.config(state="disabled")
        
        # Detect passive sentences
        passive_sentences = detect_passive_sentences(raw_text)
        passive_output.config(state="normal")
        passive_output.delete(1.0, tk.END)
        if passive_sentences:
            passive_output.insert(tk.END, "Found {} passive voice sentences:\n\n".format(len(passive_sentences)))
            for i, sentence in enumerate(passive_sentences, 1):
                passive_output.insert(tk.END, f"{i}. {sentence}\n\n")
        else:
            passive_output.insert(tk.END, "‚úÖ Excellent! No passive voice detected in your resume.")
        passive_output.config(state="disabled")
        
        # Detect weak phrases
        weak_phrases = detect_weak_phrases(raw_text)
        weak_output.config(state="normal")
        weak_output.delete(1.0, tk.END)
        if weak_phrases:
            weak_output.insert(tk.END, "Found {} weak phrases:\n\n".format(len(weak_phrases)))
            for i, phrase in enumerate(weak_phrases, 1):
                weak_output.insert(tk.END, f"{i}. '{phrase}'\n")
        else:
            weak_output.insert(tk.END, "‚úÖ Great! No weak phrases detected.")
        weak_output.config(state="disabled")
        
        # Detect sections
        detected_sections = detect_sections_improved(raw_text)
        section_output.config(state="normal")
        section_output.delete(1.0, tk.END)
        
        present_sections = list(detected_sections.keys())
        missing_sections = [sec for sec in SECTION_TEMPLATES if sec not in detected_sections]
        
        if present_sections:
            section_output.insert(tk.END, "‚úÖ DETECTED SECTIONS ({}/{}):".format(len(present_sections), len(SECTION_TEMPLATES)))
            section_output.insert(tk.END, "\n" + "="*50 + "\n")
            for section in present_sections:
                section_output.insert(tk.END, f"‚Ä¢ {section}\n")
        
        if missing_sections:
            section_output.insert(tk.END, "\n‚ùå MISSING SECTIONS ({}):\n".format(len(missing_sections)))
            section_output.insert(tk.END, "="*50 + "\n")
            for section in missing_sections:
                section_output.insert(tk.END, f"‚Ä¢ {section}\n")
        
        if not missing_sections:
            section_output.insert(tk.END, "\nüéâ All sections detected! Your resume is well-structured.")
        
        section_output.config(state="disabled")
        
        # Calculate score
        score = calculate_resume_score(raw_text, detected_sections, passive_sentences, weak_phrases)
        
        # Update score display
        if score >= 80:
            score_color = "#198754"  # Green
            score_emoji = "üåü"
            score_rating = "Excellent"
        elif score >= 60:
            score_color = "#ffc107"  # Yellow
            score_emoji = "üìà"
            score_rating = "Good"
        else:
            score_color = "#dc3545"  # Red
            score_emoji = "üìâ"
            score_rating = "Needs Improvement"
        
        score_label.config(text=f"{score_emoji} {score_rating}: {score}/100", fg=score_color)
        
        # Generate and display suggestions
        suggestions = get_improvement_suggestions(raw_text, detected_sections, passive_sentences, weak_phrases)
        suggestions_output.config(state="normal")
        suggestions_output.delete(1.0, tk.END)
        
        if suggestions:
            suggestions_output.insert(tk.END, "üí° IMPROVEMENT SUGGESTIONS:\n")
            suggestions_output.insert(tk.END, "="*60 + "\n\n")
            for i, suggestion in enumerate(suggestions, 1):
                suggestions_output.insert(tk.END, f"{i}. {suggestion}\n\n")
        else:
            suggestions_output.insert(tk.END, "üéâ CONGRATULATIONS!\n")
            suggestions_output.insert(tk.END, "="*60 + "\n\n")
            suggestions_output.insert(tk.END, "Your resume looks excellent! No major improvements needed.")
        
        suggestions_output.config(state="disabled")
        
        status_label.config(text="‚úÖ Analysis completed successfully!", fg="#198754")
        
    except Exception as e:
        messagebox.showerror("Analysis Error", f"An error occurred during analysis: {str(e)}")
        status_label.config(text="‚ùå Analysis failed", fg="#dc3545")

def clear_all():
    """Clear all text areas and reset the interface"""
    text_area.delete(1.0, tk.END)
    correction_output.config(state="normal")
    correction_output.delete(1.0, tk.END)
    correction_output.config(state="disabled")
    
    passive_output.config(state="normal")
    passive_output.delete(1.0, tk.END)
    passive_output.config(state="disabled")
    
    weak_output.config(state="normal")
    weak_output.delete(1.0, tk.END)
    weak_output.config(state="disabled")
    
    section_output.config(state="normal")
    section_output.delete(1.0, tk.END)
    section_output.config(state="disabled")
    
    suggestions_output.config(state="normal")
    suggestions_output.delete(1.0, tk.END)
    suggestions_output.config(state="disabled")
    
    score_label.config(text="Resume Score: Not analyzed", fg="#6c757d")
    status_label.config(text="‚úÖ Ready to analyze new resume", fg="#198754")

# ---------------- UI Layout ----------------

root = tk.Tk()
root.title("üéì AI Resume Analyzer - Professional Edition")
root.geometry("1400x900")
root.config(bg="#f8f9fa")
root.state('zoomed')  # Maximize window on Windows

# Configure styles
style = ttk.Style()
style.theme_use("clam")

# Header
header_frame = tk.Frame(root, bg="#0d6efd", height=100)
header_frame.pack(fill="x")
header_frame.pack_propagate(False)

title_label = tk.Label(header_frame, text="üéì AI Resume Analyzer", 
                      font=("Arial", 28, "bold"), fg="white", bg="#0d6efd")
title_label.pack(pady=(15, 5))

subtitle_label = tk.Label(header_frame, text="Professional NLP-powered resume analysis with grammar correction and improvement suggestions", 
                         font=("Arial", 12), fg="white", bg="#0d6efd")
subtitle_label.pack()

# Control panel
control_frame = tk.Frame(root, bg="#f8f9fa", height=60)
control_frame.pack(fill="x", padx=20, pady=10)
control_frame.pack_propagate(False)

upload_button = tk.Button(control_frame, text="üìÅ Upload Resume (PDF)", 
                         command=upload_file, state="disabled",
                         font=("Arial", 12, "bold"), bg="#0d6efd", fg="white",
                         padx=20, pady=8, relief="flat")
upload_button.pack(side="left", padx=(0, 20))

clear_button = tk.Button(control_frame, text="üóëÔ∏è Clear All", 
                        command=clear_all,
                        font=("Arial", 12), bg="#dc3545", fg="white",
                        padx=20, pady=8, relief="flat")
clear_button.pack(side="left", padx=(0, 20))

status_label = tk.Label(control_frame, text="üîÑ Initializing models...", 
                       font=("Arial", 12, "bold"), fg="#fd7e14", bg="#f8f9fa")
status_label.pack(side="left")

# Main content area with notebook (tabs)
notebook = ttk.Notebook(root)
notebook.pack(fill="both", expand=True, padx=20, pady=10)

# Tab 1: Text Content
text_frame = tk.Frame(notebook, bg="#f8f9fa")
notebook.add(text_frame, text="üìù Resume Content")

# Create paned window for text content
text_paned = tk.PanedWindow(text_frame, orient="horizontal", sashrelief="raised")
text_paned.pack(fill="both", expand=True, padx=10, pady=10)

# Original text panel
original_frame = tk.Frame(text_paned, bg="white", relief="raised", bd=2)
text_paned.add(original_frame, width=600)

tk.Label(original_frame, text="üìù Original Resume Text", 
         font=("Arial", 14, "bold"), bg="white", fg="#0d6efd").pack(pady=(10, 5))

text_area = scrolledtext.ScrolledText(original_frame, wrap=tk.WORD, 
                                     font=("Consolas", 11), bg="#f8f9fa", 
                                     relief="sunken", bd=1)
text_area.pack(padx=10, pady=(0, 10), fill="both", expand=True)

# Grammar corrected text panel
corrected_frame = tk.Frame(text_paned, bg="white", relief="raised", bd=2)
text_paned.add(corrected_frame, width=600)

tk.Label(corrected_frame, text="‚úÖ Grammar Corrected Version", 
         font=("Arial", 14, "bold"), bg="white", fg="#198754").pack(pady=(10, 5))

correction_output = scrolledtext.ScrolledText(corrected_frame, wrap=tk.WORD, 
                                            state="disabled", font=("Consolas", 11), 
                                            bg="#f0fff0", relief="sunken", bd=1)
correction_output.pack(padx=10, pady=(0, 10), fill="both", expand=True)

# Tab 2: Analysis Results
analysis_frame = tk.Frame(notebook, bg="#f8f9fa")
notebook.add(analysis_frame, text="üìä Analysis Results")

# Score display at top
score_frame = tk.Frame(analysis_frame, bg="#f8f9fa", height=80)
score_frame.pack(fill="x", pady=(10, 0))
score_frame.pack_propagate(False)

score_label = tk.Label(score_frame, text="Resume Score: Not analyzed", 
                      font=("Arial", 24, "bold"), bg="#f8f9fa", fg="#6c757d")
score_label.pack(pady=20)

# Create grid for analysis results
analysis_grid = tk.Frame(analysis_frame, bg="#f8f9fa")
analysis_grid.pack(fill="both", expand=True, padx=10, pady=10)

# Configure grid weights
analysis_grid.grid_rowconfigure(0, weight=1)
analysis_grid.grid_columnconfigure(0, weight=1)
analysis_grid.grid_columnconfigure(1, weight=1)

# Left column
left_analysis = tk.Frame(analysis_grid, bg="white", relief="raised", bd=2)
left_analysis.grid(row=0, column=0, sticky="nsew", padx=(0, 5))

# Passive voice
tk.Label(left_analysis, text="üé≠ Passive Voice Detection", 
         font=("Arial", 14, "bold"), bg="white", fg="#fd7e14").pack(pady=(15, 5))

passive_output = scrolledtext.ScrolledText(left_analysis, height=10, state="disabled", 
                                          wrap=tk.WORD, font=("Consolas", 10), 
                                          bg="#fff3cd", relief="sunken", bd=1)
passive_output.pack(padx=15, pady=(0, 15), fill="both", expand=True)

# Weak phrases
tk.Label(left_analysis, text="üí§ Weak/Vague Phrases", 
         font=("Arial", 14, "bold"), bg="white", fg="#dc3545").pack(pady=(0, 5))

weak_output = scrolledtext.ScrolledText(left_analysis, height=8, state="disabled", 
                                       wrap=tk.WORD, font=("Consolas", 10), 
                                       bg="#f8d7da", relief="sunken", bd=1)
weak_output.pack(padx=15, pady=(0, 15), fill="both", expand=True)

# Right column
right_analysis = tk.Frame(analysis_grid, bg="white", relief="raised", bd=2)
right_analysis.grid(row=0, column=1, sticky="nsew", padx=(5, 0))

# Section analysis
tk.Label(right_analysis, text="üìÇ Section Analysis", 
         font=("Arial", 14, "bold"), bg="white", fg="#0d6efd").pack(pady=(15, 5))

section_output = scrolledtext.ScrolledText(right_analysis, height=10, state="disabled", 
                                          wrap=tk.WORD, font=("Consolas", 10), 
                                          bg="#e7f3ff", relief="sunken", bd=1)
section_output.pack(padx=15, pady=(0, 15), fill="both", expand=True)

# Improvement suggestions
tk.Label(right_analysis, text="üí° Improvement Suggestions", 
         font=("Arial", 14, "bold"), bg="white", fg="#198754").pack(pady=(0, 5))

suggestions_output = scrolledtext.ScrolledText(right_analysis, height=8, state="disabled", 
                                              wrap=tk.WORD, font=("Arial", 11), 
                                              bg="#d4edda", relief="sunken", bd=1)
suggestions_output.pack(padx=15, pady=(0, 15), fill="both", expand=True)

# Footer
footer_frame = tk.Frame(root, bg="#e9ecef", height=40)
footer_frame.pack(fill="x", side="bottom")
footer_frame.pack_propagate(False)

tk.Label(footer_frame, text="¬© 2025 AI Resume Analyzer | Final Year Project | Built with ‚ù§Ô∏è using Transformers, spaCy & Tkinter", 
         font=("Arial", 10), bg="#e9ecef", fg="#6c757d").pack(pady=12)

# Start model loading
start_model_loading()

# Start the GUI
root.mainloop()

Device set to use cpu
Token indices sequence length is longer than the specified maximum sequence length for this model (751 > 512). Running this sequence through the model will result in indexing errors
