In [1]:
import pickle

import pandas as pd
import numpy as np
from sklearn.feature_extraction import DictVectorizer
import xgboost as xgb
import mlflow

In [2]:
# # launch mlflow
# mlflow ui --backend-store-uri sqlite:///mlflow/mlflow.db --default-artifact-root mlflow
mlflow.set_tracking_uri("sqlite:///mlflow/mlflow.db")
mlflow.set_experiment("political_engagement")

<Experiment: artifact_location='/home/adi/projects/political-engagement-mlops/mlruns/1', creation_time=1726041395995, experiment_id='1', last_update_time=1726041395995, lifecycle_stage='active', name='political_engagement', tags={}>

In [3]:
# retrieve best model
client = mlflow.MlflowClient(tracking_uri=mlflow.get_tracking_uri())

best_model = client.search_runs(
    experiment_ids="1",
    run_view_type=mlflow.entities.ViewType.ACTIVE_ONLY,
    max_results=1,
    order_by=["metrics.test_log_loss ASC"]
)[0]

model_id = best_model.info.run_id
model_path = f"runs:/{model_id}/model"
model = mlflow.xgboost.load_model(model_path)

# get preprocessor 
with open("./mlflow/preprocessor.bin", "rb") as fin:
    dv = pickle.load(fin)

# load batch
batch = pd.read_parquet("data/batches/wvs7_2024-01-01.parquet")
batch_dict = batch.to_dict(orient="records")
batch_set = dv.transform(batch_dict)
batch_set = xgb.DMatrix(batch_set)

# predict (0=subject doesn't need intervention (-), 1=subject could benefit from intervention (CONTACT))
batch["prediction"] = np.round(model.predict(batch_set)).astype(int)
batch["prediction"] = batch.prediction.replace([0, 1], ["-", "CONTACT"])
batch

Unnamed: 0,country,mode,settlement,respint,intprivacy,sex,immigrant,immigrant_mother,immigrant_father,birth_country,...,profession_father,employment_sector,chief_earner,savings,subjective_social_class,income_scale,religion,generation,subject_id,prediction
0,320,1,1,2,2,1,1,1,1,1,...,6,1,2,1,3,2,2,4,3c2e6776-95e6-4b24-bd03-c130c7f1b030,-
1,643,2,3,1,1,2,2,1,2,0,...,8,2,1,2,5,1,4,1,5cf5a4fa-c790-4703-bc85-9e6bc5318ea9,-
2,400,1,4,2,1,1,1,1,1,1,...,5,3,2,2,3,2,6,3,13137a2d-1b79-4ea3-bbc4-e26fdce86140,CONTACT
3,434,1,2,1,1,1,1,1,1,1,...,6,2,2,2,4,1,6,3,59cfb01a-ddeb-4dee-82ee-47629a779325,-
4,840,3,3,1,1,2,1,1,2,1,...,10,1,1,2,3,1,1,2,a6f4efbb-4df7-43b7-9f0a-b991a8a3a855,-
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7464,586,1,5,1,2,1,1,1,1,1,...,8,3,1,1,2,2,6,3,90301655-145e-4771-be70-58afb522b076,-
7465,716,1,2,1,1,1,1,1,2,1,...,5,1,1,3,3,2,2,3,be3b20a6-7cb7-4250-b36e-d67ef86e440b,-
7466,862,2,5,2,1,2,1,1,1,1,...,10,2,2,2,4,2,2,4,18512198-6977-4b52-b38e-fc15aaac3949,-
7467,32,2,3,1,1,1,1,1,1,1,...,5,2,1,1,3,2,2,1,4e64f4ae-c2a2-4b74-ab23-48af6e139465,-
