In [None]:
!pip install -e .

In [2]:
import datetime

from instaffo_matching.utils.logging import setup_logger
from instaffo_matching.data.loader import load_data, get_matching_dataframes
from instaffo_matching.data.preprocessor import standardize_data
from instaffo_matching.models.ranker import TalentJobRanker

  from pandas.core import (


In [3]:
# Set up logger
logger = setup_logger()

In [4]:
# Load data
data = load_data("../data/data.json")
talent_df, job_df, labels_df = get_matching_dataframes(data=data)

In [5]:
# Clean / Preprocess data
talent_df, job_df = standardize_data(talent_df, job_df)
logger.info("Data cleaned successfully")

2024-08-03 18:24:27 INFO [258458891.py:3]: Data cleaned successfully


In [6]:
ranker = TalentJobRanker()
ranker.fit(talent_df, job_df, labels_df)
logger.info("Model fitted successfully")

2024-08-03 18:24:27 INFO [ranker.py:91]: Initialized a new GradientBoostingStrategy model and FeatureEngineer.
2024-08-03 18:24:27 INFO [ranker.py:147]: Starting training process.
2024-08-03 18:24:52 INFO [ranker.py:189]: Model training completed.
2024-08-03 18:24:52 INFO [ranker.py:200]: Confusion Matrix:
[[199   1]
 [  2 198]]
2024-08-03 18:24:52 INFO [ranker.py:201]: Classification Report:
              precision    recall  f1-score   support

       False       0.99      0.99      0.99       200
        True       0.99      0.99      0.99       200

    accuracy                           0.99       400
   macro avg       0.99      0.99      0.99       400
weighted avg       0.99      0.99      0.99       400

2024-08-03 18:24:52 INFO [2178404414.py:3]: Model fitted successfully


In [7]:
# Optional: Save the trained model
formatted_time = datetime.datetime.now().strftime("%d_%m_%Y")
model_path = f"../models_artifacts/model_{formatted_time}.joblib"
await ranker.save_model(model_path)
logger.info("Model saved successfully")

2024-08-03 18:24:52 INFO [ranker.py:263]: Model and feature engineer saved to ../models_artifacts/model_03_08_2024.joblib
2024-08-03 18:24:52 INFO [3702628301.py:5]: Model saved successfully


In [8]:
# Example prediction
sample_talent = talent_df.iloc[0:50]
sample_job = job_df.iloc[0:50]
label, score = ranker.predict(sample_job, sample_talent)
logger.info(f"Predicted label: {label}, score: {score}")

2024-08-03 18:24:53 INFO [ranker.py:227]: Prediction made successfully.
2024-08-03 18:24:53 INFO [2246068842.py:5]: Predicted label: [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True], score: [0.9949852427113618, 0.9949852427113618, 0.9148238930524275, 0.9941382116070482, 0.995887294410425, 0.9955687416488119, 0.9946154585736674, 0.9951340841209061, 0.9506219026541392, 0.9947539630662372, 0.9953970167330566, 0.9906679605744976, 0.9954686623791674, 0.9950373333101156, 0.9949852427113618, 0.9954686623791674, 0.9650786127074411, 0.9959850965870671, 0.9947519663408239, 0.9602686333898094, 0.9949852427113618, 0.995492579403344, 0.9883886203474123, 0.9950373333101156, 0.9924236413235916, 0.992431936785778, 0.994007327087816, 0.9949852427113618, 0