In [None]:
!pip install pdfplumber

Collecting pdfplumber
  Using cached pdfplumber-0.11.6-py3-none-any.whl.metadata (42 kB)
Collecting pdfminer.six==20250327 (from pdfplumber)
  Downloading pdfminer_six-20250327-py3-none-any.whl.metadata (4.1 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.6-py3-none-any.whl (60 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.2/60.2 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer_six-20250327-py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m105.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pypdfium2-4.30.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [3

In [None]:
import pandas as pd
import pdfplumber
import spacy
import nltk
import sqlite3
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from google.colab import files

# Download NLP models
nltk.download("punkt")
nlp = spacy.load("en_core_web_sm")

# Load Job Descriptions
jd_file = "job_description.csv"  # Ensure this file is uploaded to Colab
jd_df = pd.read_csv(jd_file, encoding="ISO-8859-1")

# Function to extract text from a PDF Resume
def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            extracted_text = page.extract_text()
            if extracted_text:
                text += extracted_text + "\n"
    return text.strip()

# Upload Resume File
print("Please upload the resume (PDF format):")
uploaded = files.upload()

# Get the uploaded file name
resume_file = list(uploaded.keys())[0]

try:
    resume_text = extract_text_from_pdf(resume_file)
except Exception as e:
    print(f"Error reading resume file: {e}")
    exit()

# Extracting key information from JD
def extract_key_info(jd_text):
    doc = nlp(jd_text)
    skills = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]]
    return " ".join(set(skills))

# Preprocess Job Descriptions
jd_df["Processed_JD"] = jd_df["Job Description"].apply(extract_key_info)

# Matching JD with Resume using TF-IDF and Cosine Similarity
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(jd_df["Processed_JD"].tolist() + [resume_text])

# Compute similarity
cosine_sim = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])
jd_df["Match_Score"] = cosine_sim[0] * 100  # Convert to percentage

# Find the job with the highest match score
best_match = jd_df.loc[jd_df["Match_Score"].idxmax()]

# Save results to SQLite
conn = sqlite3.connect("recruitment.db")
jd_df.to_sql("candidates", conn, if_exists="replace", index=False)
conn.close()

# Print Full Matching Results
print("\nFull Matching Results:")
print(jd_df[["Job Title", "Match_Score"]].sort_values(by="Match_Score", ascending=False))

# Print Best Matched Job Title
print("\nBest Matched Job:")
print(f"Job Title: {best_match['Job Title']}")
print(f"Match Score: {best_match['Match_Score']:.2f}%")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Please upload the resume (PDF format):




Saving Devendra Singh Resume.pdf to Devendra Singh Resume.pdf





Full Matching Results:
                        Job Title  Match_Score
7            Full Stack Developer    13.353280
12             Software Architect     5.456560
6                 DevOps Engineer     5.427799
19                 UX/UI Designer     5.081528
0               Software Engineer     5.047085
10         Database Administrator     4.774900
14             IT Project Manager     4.767487
17      Embedded Systems Engineer     4.529285
3                  Cloud Engineer     4.239745
2                 Product Manager     4.147709
16              Robotics Engineer     3.890741
13           Blockchain Developer     2.890058
4           Cybersecurity Analyst     2.784726
5       Machine Learning Engineer     2.680463
18     Quality Assurance Engineer     2.603041
8               Big Data Engineer     2.333359
9                   AI Researcher     2.182564
1                  Data Scientist     1.814613
11               Network Engineer     1.433173
15  Business Intelligence Analyst   

SyntaxError: invalid syntax (<ipython-input-1-d93c8dd246e3>, line 1)