<a href="https://colab.research.google.com/github/ankit071105/ATS-Score/blob/main/AtsScore.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install -q scikit-learn pandas nltk python-docx pdfminer.six plotly gradio

In [3]:
import pandas as pd
import numpy as np
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from pdfminer.high_level import extract_text
from docx import Document
import io
from collections import defaultdict
import plotly.express as px
import plotly.graph_objects as go
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import GradientBoostingRegressor
import gradio as gr
import tempfile
import os
import base64

In [4]:
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [5]:
# 1.  ATS Scoring Model
class ATSModel:
    def __init__(self):
        # Initialize with sample data
        self.sample_data = pd.DataFrame({
            'text': [
                "Experienced Python developer with 5+ years building web applications. Skills: Python, Django, SQL. Education: BS Computer Science.",
                "Marketing manager with 7 years experience. Led teams of 10+ people. Skills: SEO, PPC, Social Media. Education: MBA Marketing.",
                "Recent computer science graduate. Projects: Machine learning models. Skills: Python, TensorFlow. Education: BS Computer Science.",
                "Graphic designer with portfolio. Skills: Photoshop, Illustrator. Education: BFA Design."
            ],
            'score': [88, 85, 76, 82]
        })
        self.vectorizer = TfidfVectorizer(max_features=200)
        self.model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3)
        self._train_model()

    def _train_model(self):
        # Train on sample data
        X = self.vectorizer.fit_transform(self.sample_data['text'])
        y = self.sample_data['score']
        self.model.fit(X, y)

    def preprocess_text(self, text):
        lemmatizer = WordNetLemmatizer()
        text = text.lower()
        text = re.sub(f'[{re.escape(string.punctuation)}]', '', text)
        words = text.split()
        words = [lemmatizer.lemmatize(word) for word in words if word not in stopwords.words('english')]
        return ' '.join(words)

    def extract_features(self, text):
        processed = self.preprocess_text(text)

        # Check for important sections
        sections = ['experience', 'education', 'skills', 'projects', 'summary']
        section_presence = {f"has_{sec}": int(sec in processed) for sec in sections}
        keywords = {
            'technical': ['python', 'java', 'sql', 'machine learning', 'algorithms'],
            'soft': ['communication', 'teamwork', 'leadership', 'problem solving'],
            'education': ['degree', 'university', 'gpa', 'coursework']
        }
        keyword_counts = {k: sum(1 for kw in v if kw in processed) for k,v in keywords.items()}
        word_count = len(processed.split())

        return {
            **section_presence,
            **keyword_counts,
            'word_count': word_count
        }

    def predict_score(self, text):
        processed = self.preprocess_text(text)
        vec = self.vectorizer.transform([processed])
        return float(np.clip(self.model.predict(vec)[0], 0, 100))
ats_model = ATSModel()

In [6]:
# 2. Feedback Generator
def generate_feedback(score, features):
    feedback = []
    if score >= 85:
        feedback.append("✅ Excellent! Your resume is well-optimized for ATS systems.")
    elif score >= 70:
        feedback.append("🟡 Good. Your resume could use some improvements to score higher.")
    else:
        feedback.append("🔴 Needs work. Significant improvements needed for better ATS performance.")
    missing_sections = [s.replace('has_', '') for s,v in features.items()
                       if s.startswith('has_') and not v]
    if missing_sections:
        feedback.append(f"⚠️ Missing sections: {', '.join(missing_sections)}")
    if features['technical'] < 3:
        feedback.append("⚠️ Add more technical skills relevant to the job")
    if features['soft'] < 2:
        feedback.append("⚠️ Include more soft skills like communication and teamwork")
    if features['word_count'] < 300:
        feedback.append("⚠️ Resume is too short - add more details about your experience")
    elif features['word_count'] > 800:
        feedback.append("⚠️ Resume is too long - focus on most relevant information")
    feedback.append("💡 Tip: Use bullet points to describe your experience")
    feedback.append("💡 Tip: Quantify achievements with numbers when possible")

    return feedback

In [7]:
# 3. Visualization Components
def create_visualizations(score, features):
    fig_gauge = go.Figure(go.Indicator(
        mode="gauge+number",
        value=score,
        domain={'x': [0, 1], 'y': [0, 1]},
        title={'text': "ATS Score"},
        gauge={
            'axis': {'range': [0, 100]},
            'steps': [
                {'range': [0, 60], 'color': "red"},
                {'range': [60, 80], 'color': "yellow"},
                {'range': [80, 100], 'color': "green"}],
            'threshold': {
                'line': {'color': "black", 'width': 4},
                'thickness': 0.75,
                'value': score}
        }))
    keyword_data = {'Technical': features['technical'],
                   'Soft Skills': features['soft'],
                   'Education': features['education']}
    fig_keywords = px.bar(
        x=list(keyword_data.keys()),
        y=list(keyword_data.values()),
        title="Keyword Coverage",
        labels={'x': 'Category', 'y': 'Count'}
    )
    section_data = {'Experience': features['has_experience'],
                   'Education': features['has_education'],
                   'Skills': features['has_skills'],
                   'Projects': features['has_projects'],
                   'Summary': features['has_summary']}
    fig_sections = px.bar(
        x=list(section_data.keys()),
        y=list(section_data.values()),
        title="Section Presence",
        labels={'x': 'Section', 'y': 'Present (1) or Missing (0)'},
        range_y=[0, 1]
    )

    return fig_gauge, fig_keywords, fig_sections

In [8]:
# 4. Universal File Handler for All Gradio Versions
def handle_uploaded_file(file_obj):
    # Create temporary directory
    temp_dir = tempfile.mkdtemp()
    file_path = os.path.join(temp_dir, "uploaded_resume")

    try:
        if isinstance(file_obj, str):
            if file_obj.endswith('.pdf'):
                resume_text = extract_text(file_obj)
            elif file_obj.endswith('.docx'):
                doc = Document(file_obj)
                resume_text = '\n'.join([para.text for para in doc.paragraphs])
            else:
                with open(file_obj, 'r', encoding='utf-8') as f:
                    resume_text = f.read()
            return resume_text

        elif hasattr(file_obj, 'read'):
            file_bytes = file_obj.read()
            if file_obj.name.endswith('.pdf'):
                resume_text = extract_text(io.BytesIO(file_bytes))
            elif file_obj.name.endswith('.docx'):
                doc = Document(io.BytesIO(file_bytes))
                resume_text = '\n'.join([para.text for para in doc.paragraphs])
            else:
                resume_text = file_bytes.decode('utf-8')
            return resume_text

        elif isinstance(file_obj, dict):
            file_bytes = base64.b64decode(file_obj['data'].split(',')[1])
            with open(file_path, 'wb') as f:
                f.write(file_bytes)

            if file_obj['name'].endswith('.pdf'):
                resume_text = extract_text(file_path)
            elif file_obj['name'].endswith('.docx'):
                doc = Document(file_path)
                resume_text = '\n'.join([para.text for para in doc.paragraphs])
            else:
                with open(file_path, 'r', encoding='utf-8') as f:
                    resume_text = f.read()
            return resume_text

        else:
            raise ValueError("Unsupported file object type")

    finally:
        if os.path.exists(file_path):
            os.remove(file_path)
        if os.path.exists(temp_dir):
            os.rmdir(temp_dir)


In [9]:
# 5. Main Analysis Function
def analyze_resume(file_obj):
    try:
        resume_text = handle_uploaded_file(file_obj)
        score = ats_model.predict_score(resume_text)
        features = ats_model.extract_features(resume_text)
        feedback = generate_feedback(score, features)
        fig_gauge, fig_keywords, fig_sections = create_visualizations(score, features)
        feedback_html = "<h3>Recommendations</h3><ul>" + \
                       "".join([f"<li>{item}</li>" for item in feedback]) + "</ul>"

        metrics_html = f"""
        <h3>Resume Metrics</h3>
        <p><b>Word Count:</b> {features['word_count']} (300-800 recommended)</p>
        <p><b>Technical Keywords:</b> {features['technical']} found</p>
        <p><b>Soft Skills:</b> {features['soft']} found</p>
        <p><b>Missing Sections:</b> {sum(1 for k,v in features.items() if k.startswith('has_') and not v)}</p>
        """

        return fig_gauge, fig_keywords, fig_sections, feedback_html, metrics_html

    except Exception as e:
        error_msg = f"<div style='color:red;padding:20px;border:1px solid red;border-radius:5px;'>Error processing file: {str(e)}</div>"
        empty_fig = go.Figure()
        empty_fig.update_layout(title="Error occurred", showlegend=False)
        return empty_fig, empty_fig, empty_fig, error_msg, ""

In [10]:
# 6. Gradio Interface with Robust Error Handling
with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as interface:
    gr.Markdown("""
    # 🚀 AI-Powered ATS Resume Analyzer
    Upload your resume to get an instant ATS score and personalized improvement recommendations
    """)

    with gr.Row():
        with gr.Column():
            file_input = gr.File(
                label="Upload Resume (PDF or DOCX)",
                file_types=[".pdf", ".docx", ".txt"],
                type="filepath"
            )
            submit_btn = gr.Button("Analyze Resume", variant="primary")

            # Deployment options
            with gr.Accordion("Deployment Options", open=False):
                gr.Markdown("""
                ### For permanent hosting:
                [![Hugging Face Spaces](https://img.shields.io/badge/Deploy%20to-Hugging%20Face%20Spaces-blue)](https://huggingface.co/spaces)
                """)
        with gr.Column():
            gauge_plot = gr.Plot(label="ATS Score")

    with gr.Row():
        keywords_plot = gr.Plot()
        sections_plot = gr.Plot()

    with gr.Row():
        feedback_output = gr.HTML()
        metrics_output = gr.HTML()

    submit_btn.click(
        analyze_resume,
        inputs=file_input,
        outputs=[gauge_plot, keywords_plot, sections_plot, feedback_output, metrics_output]
    )


In [None]:
# Launch the app
try:
    interface.launch(share=True, debug=True)
except Exception as e:
    print(f"Error launching Gradio interface: {str(e)}")
    print("Trying alternative launch method...")
    interface.launch(share=False, debug=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://d3b0bf9816c9225e2d.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
