In [13]:
import pandas as pd
import numpy as np
import joblib
from sentence_transformers import SentenceTransformer

# Load pre-saved embeddings, candidate IDs, and KNN index
candidate_embeddings = np.load("embeddings.npy")
candidate_ids = np.load("candidate_ids.npy")
knn = joblib.load("knn_index.joblib")

# Load personnel data
personnel_df = pd.read_csv("candiate_data.csv")

# Initialize SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

def parse_roles(roles):
    """
    Parse roles, generate embeddings, and structure role requirements.
    """
    role_embeddings = []
    for role in roles:
        role_text = f"Role: {role['role']}, Skills: {', '.join(role.get('skills', []))}, " \
                    f"Location: {role.get('location', 'Anywhere')}, Experience: {role.get('experience', 'Not Specified')}."
        embedding = model.encode(role_text)
        role_embeddings.append({
            "role": role['role'],
            "embedding": embedding,
            "skills": role.get('skills', []),
            "location": role.get('location', 'Anywhere'),
            "experience": role.get('experience', None),
            "quantity": role.get('quantity', 1)
        })
    return role_embeddings

def calculate_match_score(candidate, role, similarity, weight_similarity=0.9, weight_experience=0.1):
    """
    Calculate a match score based on similarity and experience.
    """
    experience_years = candidate['experience_years']
    return weight_similarity * similarity + weight_experience * experience_years

def match_roles_to_candidates(roles, knn, candidate_embeddings, candidate_ids, top_k=50):
    """
    Match roles to candidates based on role embeddings and similarity to candidate embeddings.
    """
    matches = []
    for role in roles:
        print(f"Matching candidates for role: {role['role']}...")
        role_vector = np.array(role["embedding"]).reshape(1, -1)
        distances, indices = knn.kneighbors(role_vector, n_neighbors=top_k)

        candidates = []
        for i, idx in enumerate(indices[0]):
            candidate_id = candidate_ids[idx]
            similarity = 1 - distances[0][i]  # Cosine similarity: 1 - distance

            candidate_row = personnel_df[personnel_df['person_id'] == candidate_id].iloc[0]
            
            # Handle remote roles (no location restriction)
            role_location = role['location'].strip().lower()

            if role_location != "anywhere" and role_location != "remote" and role_location not in candidate_row['country'].strip().lower():
                # Skip this candidate if location doesn't match (and role isn't remote)
                continue 
            
            # Calculate match score
            score = calculate_match_score(candidate_row, role, similarity)
            candidates.append({
                "person_id": candidate_id,
                "name": candidate_row['name'],
                "similarity": similarity,
                "experience_years": candidate_row['experience_years'],
                "skills": candidate_row['aggregated_skill'],
                "score": score
            })

        # Sort and take top matches
        candidates = sorted(candidates, key=lambda x: -x['score']) 
        matches.append({
            "role": role['role'],
            "candidates": candidates[:role["quantity"]] 
        })

        print(f"Found {len(candidates)} matches for role: {role['role']}.")

    return matches

# Example input for roles
roles_input = [
    {"role": "AI Researcher", "skills": ["Python", "Deep Learning", "TensorFlow"], "location": "United States", "experience": "3+", "quantity": 1},
    {"role": "Database Administrator", "skills": ["SQL", "Oracle"], "location": "India", "experience": "4+", "quantity": 2},
    {"role": "Remote Software Engineer", "skills": ["Java", "Spring"], "location": "Remote", "experience": "5+", "quantity": 3},
    {"role": "Data Scientist", "skills": ["Python", "Machine Learning"], "location": "Canada", "experience": "4+", "quantity": 1}
]

# Parse roles and match candidates
parsed_roles = parse_roles(roles_input)
matches = match_roles_to_candidates(parsed_roles, knn, candidate_embeddings, candidate_ids)

# Display results
for match in matches:
    print(f"Role: {match['role']}")
    for candidate in match['candidates']:
        print(f"  - Candidate ID: {candidate['person_id']}, Score: {candidate['score']:.4f}")
        print(f"    Skills: {candidate['skills']}")
        print('\n')



Matching candidates for role: AI Researcher...
Found 44 matches for role: AI Researcher.
Matching candidates for role: Database Administrator...
Found 17 matches for role: Database Administrator.
Matching candidates for role: Remote Software Engineer...
Found 50 matches for role: Remote Software Engineer.
Matching candidates for role: Data Scientist...
Found 9 matches for role: Data Scientist.
Role: AI Researcher
  - Candidate ID: 17544, Score: 4.0270
    Skills: ['Developed novel software to score the traffic sources', 'Assisted in developing a visualization system', 'Worked towards developing a clustering algorithm for astronomical objects', 'Contributed in developing a Linux based multithreaded program debugger']['Python', 'express.js', 'async', 'node.js', 'Python', 'NumPy', 'SciPy', 'C', 'OpenMP']


Role: Database Administrator
  - Candidate ID: 2622, Score: 4.0540
    Skills: ['Oracle Database Administration', 'Oracle Goldengate', 'Data Guard', 'RMAN', 'Oracle Enterprise Manager',

In [11]:
import pandas as pd
import numpy as np
import joblib
from sentence_transformers import SentenceTransformer

# Load pre-saved embeddings, candidate IDs, and KNN index
candidate_embeddings = np.load("embeddings.npy")
candidate_ids = np.load("candidate_ids.npy")
knn = joblib.load("knn_index.joblib")

# Load personnel data
personnel_df = pd.read_csv("candiate_data.csv")

# Initialize SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

def parse_roles(roles):
    """
    Parse roles, generate embeddings, and structure role requirements.
    """
    role_embeddings = []
    for role in roles:
        role_text = f"Role: {role['role']}, Skills: {', '.join(role.get('skills', []))}, " \
                    f"Location: {role.get('location', 'Anywhere')}, Experience: {role.get('experience', 'Not Specified')}."
        embedding = model.encode(role_text)
        role_embeddings.append({
            "role": role['role'],
            "embedding": embedding,
            "skills": role.get('skills', []),
            "location": role.get('location', 'Anywhere'),
            "experience": role.get('experience', None),
            "quantity": role.get('quantity', 1)
        })
    return role_embeddings

def calculate_match_score(candidate, role, similarity, weight_similarity=0.9, weight_experience=0.1):
    """
    Calculate a match score based on similarity and experience.
    """
    experience_years = candidate['experience_years']
    return weight_similarity * similarity + weight_experience * experience_years

def match_roles_to_candidates(roles, knn, candidate_embeddings, candidate_ids, top_k=50):
    """
    Match roles to candidates based on role embeddings and similarity to candidate embeddings.
    """
    matches = []
    for role in roles:
        print(f"Matching candidates for role: {role['role']}...")
        role_vector = np.array(role["embedding"]).reshape(1, -1)
        distances, indices = knn.kneighbors(role_vector, n_neighbors=top_k)

        candidates = []
        for i, idx in enumerate(indices[0]):
            candidate_id = candidate_ids[idx]
            similarity = 1 - distances[0][i]  # Cosine similarity: 1 - distance

            candidate_row = personnel_df[personnel_df['person_id'] == candidate_id].iloc[0]
            
            # Handle remote roles (no location restriction)
            role_location = role['location'].strip().lower()

            if role_location != "anywhere" and role_location != "remote" and role_location not in candidate_row['country'].strip().lower():
                # Skip this candidate if location doesn't match (and role isn't remote)
                continue 
            
            # Calculate match score
            score = calculate_match_score(candidate_row, role, similarity)
            candidates.append({
                "person_id": candidate_id,
                "name": candidate_row['name'],
                "similarity": similarity,
                "experience_years": candidate_row['experience_years'],
                "skills": candidate_row['aggregated_skill'],
                "score": score
            })

        # Sort and take top matches
        candidates = sorted(candidates, key=lambda x: -x['score']) 
        matches.append({
            "role": role['role'],
            "candidates": candidates[:role["quantity"]] 
        })

        print(f"Found {len(candidates)} matches for role: {role['role']}.")

    return matches

# Example input for roles
roles_input =  [
    {
      "role": "Mobile App Developer",
      "skills": [
        "iOS Development",
        "Android Development",
        "Biometric Authentication"
      ],
      "location": "Remote",
      "experience": "3-5 years",
      "quantity": 10
    },
    {
      "role": "Backend Developer",
      "skills": [
        "Node.js",
        "Database Management",
        "API Development"
      ],
      "location": "Remote",
      "experience": "3-5 years",
      "quantity": 2
    },
    {
      "role": "UI/UX Designer",
      "skills": [
        "Mobile App Design",
        "User Experience",
        "Prototyping"
      ],
      "location": "Remote",
      "experience": "2-4 years",
      "quantity": 1
    },
    {
      "role": "QA Engineer",
      "skills": [
        "Mobile App Testing",
        "Security Testing",
        "Automated Testing"
      ],
      "location": "Remote",
      "experience": "2-4 years",
      "quantity": 2
    },
    {
      "role": "Security Specialist",
      "skills": [
        "App Security",
        "Encryption Protocols",
        "Penetration Testing"
      ],
      "location": "Remote",
      "experience": "5-7 years",
      "quantity": 1
    },
    {
      "role": "Project Manager",
      "skills": [
        "Agile Methodologies",
        "Team Coordination",
        "Banking Regulations"
      ],
      "location": "Remote",
      "experience": "5-7 years",
      "quantity": 1
    }
  ]


# Parse roles and match candidates
parsed_roles = parse_roles(roles_input)
matches = match_roles_to_candidates(parsed_roles, knn, candidate_embeddings, candidate_ids)

# Display results
for match in matches:
    print(f"Role: {match['role']}")
    for candidate in match['candidates']:
        print(f"  - Candidate ID: {candidate['person_id']}, Score: {candidate['score']:.4f}, Experience: {candidate['experience_years']} years")
        print(f"    Skills: {candidate['skills']}")
        print('\n')




Matching candidates for role: Mobile App Developer...
Found 50 matches for role: Mobile App Developer.
Matching candidates for role: Backend Developer...
Found 50 matches for role: Backend Developer.
Matching candidates for role: UI/UX Designer...
Found 50 matches for role: UI/UX Designer.
Matching candidates for role: QA Engineer...
Found 50 matches for role: QA Engineer.
Matching candidates for role: Security Specialist...
Found 50 matches for role: Security Specialist.
Matching candidates for role: Project Manager...
Found 50 matches for role: Project Manager.
Role: Mobile App Developer
  - Candidate ID: 3728, Score: 4.5235, Experience: 39.0 years
    Skills: ['Content management', 'Git', 'Html', 'Javascript', 'Bootstrap', 'Css', 'Sass', 'Content management system', 'jquery', 'Processor']['Interfaced directly with the client', 'Assisted with wireframing of designs', 'Wrote HTML and CSS', 'Led gathering of the requirements', 'Checked codebase and tested the software', 'Designed the f