In [16]:
from sentence_transformers import SentenceTransformer
from rapidfuzz import fuzz

embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

user_desc = "Python developer with web backend experience"
job_desc = "Backend engineer skilled in Django and Python"

# (Temporary) Embedding similarity
user_vec = embedding_model.encode(user_desc)
job_vec = embedding_model.encode(job_desc)

from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity([user_vec], [job_vec])[0][0]

# (Temporary) Fuzzy match score
fuzzy_score = fuzz.partial_ratio(user_desc, job_desc)

print(f"Embedding similarity: {similarity:.1f}")
print(f"Fuzzy score: {fuzzy_score}")


Embedding similarity: 0.76
Fuzzy score: 37.93103448275862


In [5]:
# (Temporary) Import necessary libraries
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from rapidfuzz import fuzz

# (Temporary) STEP 1: Load the embedding model
# This will download the model the first time it's used (about 80MB)
model = SentenceTransformer('all-MiniLM-L6-v2')

# (Temporary) STEP 2: Example user profile
user_profile = {
    "name": "Jeric Natividad",
    "skills": ["Python", "Data Analysis", "Machine Learning", "Communication"],
    "education": "Bachelor of Science in Computer Science",
    "experience": "Junior Developer at Tech Startup",
    "languages": ["English", "Tagalog"]
}

# (Temporary) STEP 3: Example job data
job_data = pd.DataFrame([
    {
        "job_title": "Data Analyst",
        "qualifications": "Strong in Python, data visualization, Excel, and analytics. SQL experience preferred.",
        "language_requirement": "English required"
    },
    {
        "job_title": "Software Engineer",
        "qualifications": "Experience with full-stack web development using JavaScript, React, and Node.js. Python is a plus.",
        "language_requirement": "English required"
    },
    {
        "job_title": "AI Research Assistant",
        "qualifications": "Knowledge of machine learning, deep learning, and research writing. Strong in Python and data science.",
        "language_requirement": "English beneficial but not mandatory"
    },
    {
        "job_title": "Junior QA Tester",
        "qualifications": "Attention to detail, basic scripting knowledge, and experience with testing tools.",
        "language_requirement": "English beneficial"
    }
])

# (Temporary) STEP 4: Combine user text for embedding
user_text = " ".join(user_profile["skills"]) + " " + user_profile["education"] + " " + user_profile["experience"]
user_embedding = model.encode(user_text)

# (Temporary) STEP 5: Compute job embeddings
job_data["embedding"] = job_data["qualifications"].apply(lambda x: model.encode(x))

# (Temporary) STEP 6: Calculate semantic similarity
def compute_semantic_score(user_emb, job_emb):
    """(Temporary) Compute cosine similarity between user and job embeddings"""
    return float(cosine_similarity([user_emb], [job_emb])[0][0])

job_data["semantic_score"] = job_data["embedding"].apply(lambda emb: compute_semantic_score(user_embedding, emb))

# (Temporary) STEP 7: Apply fuzzy match scoring for language preference
def fuzzy_language_bonus(lang_requirement, user_languages):
    """(Temporary) Add bonus points if user's language partially matches job requirement"""
    for lang in user_languages:
        # Full match if user's language is explicitly required
        if fuzz.partial_ratio(lang.lower(), lang_requirement.lower()) > 90:
            return 1.0
    # Partial bonus if language is 'beneficial' but not mandatory
    if "beneficial" in lang_requirement.lower():
        return 0.5
    return 0.0

job_data["language_score"] = job_data["language_requirement"].apply(
    lambda lang: fuzzy_language_bonus(lang, user_profile["languages"])
)

# (Temporary) STEP 8: Weighted final score
job_data["final_score"] = (job_data["semantic_score"] * 0.8) + (job_data["language_score"] * 0.2)

# (Temporary) STEP 9: Sort results from best to worst match
job_data = job_data.sort_values(by="final_score", ascending=False)

# (Temporary) STEP 10: Display results
print("üèÜ Job Match Results for:", user_profile["name"])
for _, row in job_data.iterrows():
    print(f"\nüîπ {row['job_title']}")
    print(f"Semantic Score: {row['semantic_score']:.3f}")
    print(f"Language Bonus: {row['language_score']}")
    print(f"Final Match Score: {row['final_score']:.3f}")


üèÜ Job Match Results for: Jeric Natividad

üîπ AI Research Assistant
Semantic Score: 0.488
Language Bonus: 1.0
Final Match Score: 0.590

üîπ Data Analyst
Semantic Score: 0.456
Language Bonus: 1.0
Final Match Score: 0.565

üîπ Junior QA Tester
Semantic Score: 0.249
Language Bonus: 1.0
Final Match Score: 0.400

üîπ Software Engineer
Semantic Score: 0.193
Language Bonus: 1.0
Final Match Score: 0.354
