<a href="https://colab.research.google.com/github/darvesh-sd/Copy-of-TPSessions.ipynb/blob/main/CVMatcher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
# Install necessary libraries (run this first in Colab)
!pip install flask pyngrok pandas scikit-learn python-docx python-pptx
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m87.4 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
# Import necessary libraries
from flask import Flask, request, render_template_string
from pyngrok import ngrok
import spacy
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from docx import Document  # For extracting text from Word files
from pptx import Presentation  # For extracting text from PowerPoint files

# Set up ngrok authentication (replace with your ngrok authtoken)
ngrok.set_auth_token("2sCWpByoB3MMgyVxPCf26j0nlBj_28FSpNCBwdBLMAKVSoS5C")  # Replace with your ngrok authtoken

# Initialize Flask application
app = Flask(__name__)

# Load SpaCy language model for NLP
nlp = spacy.load("en_core_web_sm")

# Function to extract text from supported file types
def extract_text_from_file(file):
    """
    Extract text from .txt, .docx, or .pptx files.
    """
    filename = file.filename.lower()
    text = ""

    if filename.endswith(".txt"):
        # Extract text from text file
        text = file.read().decode("utf-8")
    elif filename.endswith(".docx"):
        # Extract text from Word document
        document = Document(file)
        text = "\n".join([paragraph.text for paragraph in document.paragraphs])
    elif filename.endswith(".pptx"):
        # Extract text from PowerPoint file
        presentation = Presentation(file)
        text = "\n".join(
            [shape.text for slide in presentation.slides for shape in slide.shapes if hasattr(shape, "text")]
        )
    else:
        raise ValueError("Unsupported file format. Please upload .txt, .docx, or .pptx files.")

    return text

# Function to extract skills from text
def extract_skills(text):
    """
    Extract potential skills from a given text using SpaCy.
    Skills are identified as nouns or proper nouns.
    """
    doc = nlp(text)
    skills = [token.text.lower() for token in doc if token.pos_ in ["NOUN", "PROPN"] and not token.is_stop]
    return skills

# Route for the home page to upload files
@app.route("/")
def home():
    """
    Render the home page with an upload form for the CV and Job Description.
    """
    return render_template_string("""
    <!DOCTYPE html>
    <html>
    <head>
        <title>Match it</title>
        <style>
            body { font-family: Arial, sans-serif; margin: 20px; background-color: #f9f9f9; }
            h1 { color: #333; }
            form { background: #40E0D0; padding: 20px; border-radius: 8px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); max-width: 400px; margin: auto; }
            label { font-weight: bold; }
            button { background-color: #007BFF; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer; }
            button:hover { background-color: #0056b3; }
        </style>
    </head>
    <body>
        <h1>Upload Your CV and Job Description</h1>
        <form action="/upload" method="post" enctype="multipart/form-data">
            <label>Upload CV (.txt, .docx, .pptx):</label><br>
            <input type="file" name="cv" required><br><br>
            <label>Upload Job Description (.txt, .docx, .pptx):</label><br>
            <input type="file" name="job" required><br><br>
            <button type="submit">Upload</button>
        </form>
    </body>
    </html>
    """)

# Route to process uploaded files and calculate similarity
@app.route("/upload", methods=["POST"])
def upload():
    """
    Process the uploaded CV and job description, calculate similarity,
    and display missing skills with visualizations.
    """
    try:
        # Retrieve the uploaded files
        cv_file = request.files["cv"]
        job_file = request.files["job"]

        # Extract text from the files
        cv_text = extract_text_from_file(cv_file)
        job_text = extract_text_from_file(job_file)

        # Extract skills from both files
        cv_skills = extract_skills(cv_text)
        job_skills = extract_skills(job_text)

        # Create a DataFrame to analyze skills
        all_skills = pd.DataFrame({"Skill": list(set(cv_skills + job_skills))})
        all_skills["In CV"] = all_skills["Skill"].apply(lambda x: x in cv_skills)
        all_skills["In Job Description"] = all_skills["Skill"].apply(lambda x: x in job_skills)

        # Identify skills in the job description but missing in the CV
        missing_skills = all_skills[~all_skills["In CV"] & all_skills["In Job Description"]]["Skill"].tolist()

        # Calculate similarity score using Cosine Similarity
        vectorizer = CountVectorizer()
        combined_text = [" ".join(cv_skills), " ".join(job_skills)]
        skill_vectors = vectorizer.fit_transform(combined_text)
        similarity_score = cosine_similarity(skill_vectors)[0, 1] * 100

        # Display results with visualizations
        return render_template_string(f"""
        <!DOCTYPE html>
        <html>
        <head><title>Results</title></head>
        <body>
            <h1>Matching Results</h1>
            <h2>Similarity Score: {similarity_score:.2f}%</h2>
            <h2>Missing Skills:</h2>
            <p>{", ".join(missing_skills) if missing_skills else "None"}</p>
            <a href="/">Go Back</a>
        </body>
        </html>
        """)
    except Exception as e:
        return f"<p>Error: {str(e)}</p><a href='/'>Go Back</a>"

# Start ngrok to make the app accessible
public_url = ngrok.connect(5000)
print(f"Your app is running at: {public_url}")

# Run the Flask application
app.run(port=5000)

Your app is running at: NgrokTunnel: "https://94a3-34-125-223-77.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [27/Jan/2025 10:31:55] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jan/2025 10:31:57] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [27/Jan/2025 10:32:10] "POST /upload HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jan/2025 10:32:18] "GET / HTTP/1.1" 200 -
