In [1]:
import numpy as np
import pandas as pd
import pickle
import sys
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
# file paths & setup
resume_skil_features_path = 'C:/Users/ashua/Desktop/Inelligent Job Recomendation Engine/data/Feature Engineering Data/resume_skill_features.csv'
job_skil_features_path = 'C:/Users/ashua/Desktop/Inelligent Job Recomendation Engine/data/Feature Engineering Data/job_skill_features.csv'
training_data_path = 'C:/Users/ashua/Desktop/Inelligent Job Recomendation Engine/data/Supervised Training/supervised_training_data.csv'

In [3]:
# Define the models we need to retrain to get the final model object
# In a production environment, these would be loaded from a saved pickle/joblib file.
# Here, we quickly retrain them using the same logic as the previous script to get the object.

def get_trained_model(X, y):
    print("Retraining the final Random Forest Model (best Performer) for deployment...")

    # Split data again to ensure consistent feature scaling/ordering
    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

    #train the best model (Random Forest Classifier)
    rf_model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42, n_jobs=-1)
    rf_model.fit(X_train, y_train)

    return rf_model

In [4]:
# Load the data and retrain the model
print("--- Starting Final Job Recommendation Engine ---")

try:
    #Load the data
    training_data = pd.read_csv(training_data_path)
    resume_features_df = pd.read_csv(resume_skil_features_path).set_index('ID')
    job_features_df = pd.read_csv(job_skil_features_path).set_index('job_id')

except FileNotFoundError as e:
    print(f"Error: Missing required data file. Please ensure '{e.filename}' is available.")
    exit()
    
except KeyError as e:
    # Catch errors if 'ID' or 'job_id' columns are missing in feature files
    print(f"Error: Missing required index column {e} in one of the feature files. Check Phase 2 output files.")
    exit()

--- Starting Final Job Recommendation Engine ---


In [5]:
# Extract feature columns based on what was used in training
feature_columns = [c for c in training_data.columns if c.startswith('R_') or c.startswith('J_')]

# Ensure we only use the identified feature columns from the training data
X_train_full = training_data[feature_columns]
y_train_full = training_data['label']

# Extract the base skill columns to reconstruct the feature list later
skill_cols_base = [c.replace('R_', '') for c in feature_columns if c.startswith('R_')]
resume_skill_cols = [f'R_{c}' for c in skill_cols_base]
job_skill_cols = [f'J_{c}' for c in skill_cols_base]
all_feature_cols = resume_skill_cols + job_skill_cols   

#Retrain the model to get the deployable object
final_model = get_trained_model(X_train_full, y_train_full)
print("Final model ready for deployment.")

Retraining the final Random Forest Model (best Performer) for deployment...
Final model ready for deployment.


In [6]:
# Recommendation function
def generate_recommendations(resume_id, job_ids, final_model, resume_features, job_features, feature_cols):

    print(f"\n2.1. Generating recommendations for Resume ID: {resume_id} against {len(job_ids)} jobs...")
    if resume_id not in resume_features.index:
        print(f"Error: Resume ID {resume_id} not found in features data.")
        return pd.DataFrame()
    
    # check if the target resume exists
    R_vec = resume_features.loc[resume_id].values

    #2 Extrat job features vectors
    job_ids_valid = [j_id for j_id in job_ids if j_id in job_features.index]

    if not job_ids_valid:
        print("Warning: None of the provided job IDs were found in the job features data.")
        return pd.DataFrame()
    
    J_matrix = job_features.loc[job_ids_valid].values

    # create the input matrix (X_preditor)
    # The input needs to be N rows (number of jobs) x 70 columns (35 R features + 35 J features)

    #Title the resume vector (R_vec) to match the number of jobs
    R_matrix_tiled = np.tile(R_vec, (len(job_ids_valid), 1))

    #concatinate the tiled R_matrix with the J_matrix horizontally
    # R_features (35 cols) followed by J_features (35 cols)
    X_predit = np.hstack((R_matrix_tiled, J_matrix))

    # predict the match probabilities
    # we only need the probability of the positive class (match = 1)
    match_probalities = final_model.predict_proba(X_predit)[:, 1]

    # Compilte the results into a DataFrame
    recommendations_df = pd.DataFrame({
        'job_id': job_ids_valid,
        'predicted_score': match_probalities
    })

    # Rank and format the results
    recommendations_df = recommendations_df.sort_values(by='predicted_score', ascending=False).reset_index(drop=True)
    recommendations_df['rank'] = recommendations_df.index + 1

    return recommendations_df[['rank', 'job_id', 'predicted_score']]



In [7]:
# Run Recommendation Example (Use a sample from our data)

SAMPLE_RESUME_ID = resume_features_df.index[0]  # Use the first resume ID found
SAMPLE_JOB_IDS = job_features_df.index[5:15].tolist() # Use the next 10 job IDs

# Run the engine
recommendations = generate_recommendations(
    resume_id=SAMPLE_RESUME_ID,
    job_ids=SAMPLE_JOB_IDS,
    final_model=final_model,
    resume_features=resume_features_df,
    job_features=job_features_df,
    feature_cols=all_feature_cols
)   


2.1. Generating recommendations for Resume ID: 16852973 against 10 jobs...




In [9]:
#Final Output
if not recommendations.empty:
    print("--- FINAL JOB RECOMMENDATION LIST ---")
    print(f"Target Resume: {SAMPLE_RESUME_ID}")
    print(recommendations.to_markdown(index=False, floatfmt=".4f"))
    print("\nRecommendation Complete.")
else:
    print("\nRecommendation failed or returned an empty set.")

--- FINAL JOB RECOMMENDATION LIST ---
Target Resume: 16852973
|    rank |         job_id |   predicted_score |
|--------:|---------------:|------------------:|
|  1.0000 |  35982263.0000 |            0.2029 |
|  2.0000 |  56924323.0000 |            0.1628 |
|  3.0000 |  83789755.0000 |            0.1353 |
|  4.0000 |  23221523.0000 |            0.1353 |
|  5.0000 |  91700727.0000 |            0.1180 |
|  6.0000 |  11009123.0000 |            0.1039 |
|  7.0000 | 103254301.0000 |            0.1039 |
|  8.0000 |  95428182.0000 |            0.0930 |
|  9.0000 | 111513530.0000 |            0.0797 |
| 10.0000 |  69333422.0000 |            0.0481 |

Recommendation Complete.
