In [76]:
# Import necessary libraries
import pandas as pd
import numpy as np
import json
import requests  # For making API calls
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [77]:
# Function to call external API to evaluate match quality
def external_api_evaluate_match(matches, api_key):
    api_url = "https://staging-phloneiron-case-backend-57k2.encr.app/score"
    response = requests.post(api_url, data=json.dumps({"matches": matches}), headers={"Authorization": f"Bearer {api_key}"})
    print(f"API Response: {response.json()}")  # Debug print
    if response.status_code == 200:
        score = response.json()
    else:
        score = {}
    return score

In [78]:
from google.colab import files
uploaded = files.upload()

Saving candidates.csv to candidates.csv
Saving companies.csv to companies.csv
Saving vacancies.csv to vacancies.csv


In [79]:

# Load the data (Replace these lines with actual data loading code in Google Colab)
candidates_df = pd.read_csv("candidates.csv")
companies_df = pd.read_csv("companies.csv")
vacancies_df = pd.read_csv("vacancies.csv")

In [80]:
# Simplified text preprocessing
def simple_preprocess_text(text):
    text = text.lower()
    tokens = text.split()
    filtered_tokens = [token for token in tokens if token.isalnum()]
    return ' '.join(filtered_tokens)

In [81]:
# Apply text preprocessing
candidates_df['processed_mission'] = candidates_df['mission_statement'].apply(simple_preprocess_text)
companies_df['processed_mission'] = companies_df['mission_statement'].apply(simple_preprocess_text)
vacancies_df['processed_description'] = vacancies_df['description'].apply(simple_preprocess_text)

In [82]:
# Feature engineering using TfidfVectorizer
combined_corpus = pd.concat([candidates_df['processed_mission'], companies_df['processed_mission'], vacancies_df['processed_description']], ignore_index=True)
tfidf_vectorizer = TfidfVectorizer()
tfidf_vectorizer.fit(combined_corpus)

candidates_tfidf = tfidf_vectorizer.transform(candidates_df['processed_mission'])
companies_tfidf = tfidf_vectorizer.transform(companies_df['processed_mission'])
vacancies_tfidf = tfidf_vectorizer.transform(vacancies_df['processed_description'])

In [83]:
# Calculate cosine similarity
candidate_company_sim = cosine_similarity(candidates_tfidf, companies_tfidf)
candidate_vacancy_sim = cosine_similarity(candidates_tfidf, vacancies_tfidf)

In [84]:
# Function to get top N matches for each vacancy
def get_top_matches(candidate_vacancy_sim, candidate_company_sim, top_n=3, api_key=None):
    top_matches = {}
    matches_for_api = []

    for vacancy_id in range(candidate_vacancy_sim.shape[1]):
        vacancy_sim_scores = candidate_vacancy_sim[:, vacancy_id]
        company_id = vacancies_df.loc[vacancy_id, 'company_id']
        company_sim_scores = candidate_company_sim[:, company_id]

        # Calculate combined similarity
        combined_sim_scores = (vacancy_sim_scores + company_sim_scores) / 2.0

        top_candidate_indices = np.argsort(combined_sim_scores)[::-1][:top_n]

        # Convert NumPy int64 to native Python int before appending to matches_for_api
        matches_for_api.append({
            "vacancy_id": int(vacancy_id),
            "company_id": int(company_id),
            "candidate_ids": [int(idx) for idx in top_candidate_indices.tolist()]
        })

    # Evaluate matches using the external API
    print(f"Matches for API: {matches_for_api}")  # Debug print
    api_scores = external_api_evaluate_match(matches_for_api, api_key)

    # Finalize top matches based on API scores (implementation will depend on API response structure)
    top_matches = api_scores  # Replace this with the actual logic based on API response structure

    return top_matches


In [85]:
# Your actual API key here
api_key = "f0302950-8bf9-4dd6-97f8-2b661e0b58e2"

In [86]:
# Get the top 3 matches for each vacancy
top_3_matches = get_top_matches(candidate_vacancy_sim, candidate_company_sim, top_n=3, api_key=api_key)

Matches for API: [{'vacancy_id': 0, 'company_id': 97, 'candidate_ids': [109, 33, 113]}, {'vacancy_id': 1, 'company_id': 50, 'candidate_ids': [160, 124, 97]}, {'vacancy_id': 2, 'company_id': 58, 'candidate_ids': [113, 53, 63]}, {'vacancy_id': 3, 'company_id': 50, 'candidate_ids': [160, 114, 97]}, {'vacancy_id': 4, 'company_id': 68, 'candidate_ids': [217, 68, 243]}, {'vacancy_id': 5, 'company_id': 51, 'candidate_ids': [7, 90, 95]}, {'vacancy_id': 6, 'company_id': 83, 'candidate_ids': [136, 329, 115]}, {'vacancy_id': 7, 'company_id': 18, 'candidate_ids': [299, 338, 260]}, {'vacancy_id': 8, 'company_id': 38, 'candidate_ids': [277, 4, 60]}, {'vacancy_id': 9, 'company_id': 68, 'candidate_ids': [68, 97, 43]}, {'vacancy_id': 10, 'company_id': 80, 'candidate_ids': [63, 97, 46]}, {'vacancy_id': 11, 'company_id': 100, 'candidate_ids': [255, 100, 246]}, {'vacancy_id': 12, 'company_id': 19, 'candidate_ids': [336, 285, 90]}, {'vacancy_id': 13, 'company_id': 103, 'candidate_ids': [97, 28, 114]}, {'va

In [73]:
# Display or save the top 3 matches
print(top_3_matches)

{}


In [None]:
# The instructions reiterate that the primary goal is to optimally match a set of job-seeking candidates to a set of companies and their vacancies. The optimal matches should meet the following constraints:

# A large overlap between the personal values of the candidate and the company values of the company posting the vacancy.
# An area of expertise that is aligned with the candidate.
# The code have been running attempts to make these optimal matches by calculating the cosine similarity between the features (such as mission statements and job descriptions) of candidates, companies, and vacancies. It also tries to get a score for these matches from an external API.

# It appears that the match calculation is working as expected, but the API response might not be providing the information you need.

# Given the  instructions and the debug information i've provided, I suggest the following:

# Check if the API is specifically designed to evaluate matches based on personal values and areas of expertise, as described in the instructions. If not, we might want to adjust the feature engineering steps to better align with these criteria.