In [3]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
import pennylane as qml
from pennylane.optimize import NesterovMomentumOptimizer
from scipy.sparse import lil_matrix
import random

# Step 1: Generate varied synthetic data for resumes and job descriptions
def generate_synthetic_resumes(num_resumes):
    domains = ['Data Science', 'Software Engineering', 'Marketing', 'Product Management', 'Data Engineering']
    skills = ['Python', 'R', 'SQL', 'Machine Learning', 'Deep Learning', 'TensorFlow', 'Tableau', 'Power BI', 'Java', 'C++', 'Marketing Strategies', 'Cloud Computing']
    experiences = ['Led a team of data scientists', 'Developed machine learning models', 'Built cloud-based systems', 'Managed projects end-to-end', 'Created data pipelines']

    resumes = []
    for _ in range(num_resumes):
        domain = random.choice(domains)
        resume = f"Experienced {domain} professional. {random.choice(experiences)} using {random.choice(skills)}."
        resumes.append(resume)
    return resumes

def generate_synthetic_jobs(num_jobs):
    job_titles = ['Data Scientist', 'Backend Developer', 'Marketing Manager', 'Cloud Engineer', 'Product Manager']
    skills = ['Machine Learning', 'Backend Systems', 'Data Analysis', 'Deep Learning', 'Cloud Infrastructure', 'Agile Methodology', 'Marketing Strategy']
    responsibilities = ['Develop algorithms', 'Build scalable applications', 'Manage marketing campaigns', 'Create cloud-based solutions', 'Lead cross-functional teams']

    jobs = []
    for _ in range(num_jobs):
        job_title = random.choice(job_titles)
        job = f"{job_title} - {random.choice(responsibilities)}. Required skills: {random.choice(skills)}."
        jobs.append(job)
    return jobs

num_resumes = 50  # More varied synthetic resumes
num_jobs = 20  # More varied synthetic jobs

synthetic_resumes = generate_synthetic_resumes(num_resumes)
synthetic_jobs = generate_synthetic_jobs(num_jobs)

# Step 2: Preprocess and compute compatibility scores using cosine similarity
vectorizer = TfidfVectorizer(max_features=5000)  # Limit number of features to handle large datasets
all_text = synthetic_resumes + synthetic_jobs

# Fit the vectorizer and transform the text data
text_vectors = vectorizer.fit_transform(all_text)
resume_vectors = text_vectors[:num_resumes]
job_vectors = text_vectors[num_resumes:]

# Apply dimensionality reduction using SVD (TruncatedSVD for large datasets)
n_components = min(50, resume_vectors.shape[1])  # Set n_components to the minimum of 50 or the number of features
svd = TruncatedSVD(n_components=n_components)  # Reducing to the appropriate number of components
reduced_resume_vectors = svd.fit_transform(resume_vectors)
reduced_job_vectors = svd.transform(job_vectors)

# Compute cosine similarity matrix using reduced vectors
compatibility_matrix = cosine_similarity(reduced_resume_vectors, reduced_job_vectors)

# Step 3: Frame the problem as a QUBO
lambda_constraint = 10  # Penalty weight for constraints
n_candidates = num_resumes
n_jobs = num_jobs

# Initialize sparse QUBO matrix (using lil_matrix for efficiency with large data)
qubo_matrix = lil_matrix((n_candidates * n_jobs, n_candidates * n_jobs))

# Populate QUBO matrix based on compatibility and constraints
for i in range(n_candidates):
    for j in range(n_jobs):
        idx = i * n_jobs + j
        qubo_matrix[idx, idx] = -compatibility_matrix[i, j]

# Add row constraints (each candidate assigned to at most one job)
for i in range(n_candidates):
    for j1 in range(n_jobs):
        for j2 in range(j1 + 1, n_jobs):
            idx1 = i * n_jobs + j1
            idx2 = i * n_jobs + j2
            qubo_matrix[idx1, idx2] += lambda_constraint
            qubo_matrix[idx2, idx1] += lambda_constraint

# Add column constraints (each job assigned to at most one candidate)
for j in range(n_jobs):
    for i1 in range(n_candidates):
        for i2 in range(i1 + 1, n_candidates):
            idx1 = i1 * n_jobs + j
            idx2 = i2 * n_jobs + j
            qubo_matrix[idx1, idx2] += lambda_constraint
            qubo_matrix[idx2, idx1] += lambda_constraint

# Step 4: Quantum Annealing Optimization with PennyLane
def cost_fn(params):
    binary_vector = (params > 0.5).astype(int)
    return binary_vector @ qubo_matrix @ binary_vector.T

# Initialize quantum optimizer
opt = NesterovMomentumOptimizer(stepsize=0.1)
params = np.random.rand(n_candidates * n_jobs)  # Random initialization

# Optimize
for _ in range(100):  # Number of iterations
    params = opt.step(cost_fn, params)

# Convert results to binary assignment matrix
binary_vector = (params > 0.5).astype(int)
binary_assignment = binary_vector.reshape((n_candidates, n_jobs))

# Step 5: Greedy Refinement
final_assignment = np.zeros_like(binary_assignment)
while np.sum(final_assignment) < min(n_candidates, n_jobs):
    # Find the maximum value in the compatibility matrix where the candidate is not yet assigned
    max_indices = np.unravel_index(np.argmax(compatibility_matrix * binary_assignment, axis=None), compatibility_matrix.shape)
    candidate_idx, job_idx = max_indices
    final_assignment[candidate_idx, job_idx] = 1
    binary_assignment[candidate_idx, :] = 0
    binary_assignment[:, job_idx] = 0

# Step 6: Interpret Results
print("Compatibility Matrix:")
print(compatibility_matrix)

print("\nBinary Assignment Matrix:")
print(final_assignment)

print("\nInterpretation:")
for candidate_idx, assignment in enumerate(final_assignment):
    assigned_jobs = np.where(assignment == 1)[0]
    if assigned_jobs.size > 0:
        job_idx = assigned_jobs[0]
        print(f"Candidate {candidate_idx + 1} ('{synthetic_resumes[candidate_idx]}') is assigned to Job {job_idx + 1} ('{synthetic_jobs[job_idx]}').")
    else:
        print(f"Candidate {candidate_idx + 1} ('{synthetic_resumes[candidate_idx]}') is not assigned to any job.")




Compatibility Matrix:
[[-7.98699651e-17 -2.80071068e-17 -1.47064019e-16 -6.96807654e-17
  -4.58553734e-18 -1.01052158e-16 -5.98574150e-17 -4.48048178e-17
  -1.80310253e-16  9.68517408e-02 -1.27024156e-16 -5.97472255e-17
  -5.62531218e-17 -1.47391684e-17  1.35691908e-17 -1.45007052e-17
  -6.24500451e-17  1.16844733e-01 -4.16333634e-17  6.93889390e-18]
 [ 5.44223417e-17 -1.39780633e-17  3.25196207e-01  8.97279656e-17
   2.40626710e-01  5.06223988e-17 -6.99201251e-18  8.51023285e-17
  -6.34805283e-17  4.52035439e-02  3.60490355e-18  2.72214917e-01
   1.21319790e-17 -5.78257915e-18  2.59736815e-01  3.08101218e-01
   5.20417043e-17  5.45348588e-02  2.94239357e-01  4.85722573e-17]
 [-3.72584535e-17 -6.39767960e-17  7.18459760e-02 -2.24551650e-16
   1.42020042e-01 -1.72663606e-16 -1.47980865e-16 -1.56343682e-16
  -9.54351751e-17  3.85289610e-02  1.63911201e-18  6.01407579e-02
  -6.05748018e-17 -8.53346898e-17  1.53298997e-01  6.80691602e-02
  -1.52655666e-16  4.64824496e-02  6.50066431e-02 -1