In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import matplotlib.pyplot as plt
import mlflow
import numpy as np
import torch
import os
import pandas as pd

import sys
sys.path.append(os.path.dirname(os.getcwd()))

from constants import TRACKING_SERVER_URI, DATA_DIR, EXPERIMENT_PREFIX
from utils.plots import roc_curve_plot
from utils.load_data import get_dfs
from utils.data_preprocessor import DataPreprocessor
from sklearn.metrics import classification_report, f1_score
from torch.utils.data import DataLoader

mlflow.set_tracking_uri(TRACKING_SERVER_URI)

np.random.seed(13)
torch.manual_seed(13)

In [None]:
os.environ["MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR"] = "False"

In [None]:
with open("config.json", "r") as f:
    config = json.load(f)

GROUP = "Grupo" + str(config['group'])
MODEL_ARCH = config['model_arch']
COMPARISON = config['comparison']

In [None]:
GROUP_DIR = os.path.join(DATA_DIR, GROUP)
GROUP_PARAMS_FILE = os.path.join(GROUP_DIR, f"params_{GROUP}.json")
if os.path.exists(GROUP_PARAMS_FILE):
    with open(GROUP_PARAMS_FILE, 'r') as f:
        group_params = json.load(f)
else:
    print(f"Group params file not found: {GROUP_PARAMS_FILE}")

REQ_PERIODS = group_params['first_tr_period'] - 1
TEMP_FEATS = [f'y(t-{i})' for i in range(REQ_PERIODS, 0, -1)]
STAT_FEATS = ['inicio_prog']
FEATS = STAT_FEATS + TEMP_FEATS

N_PER_DEP = group_params['n_per_dep']

In [None]:
def get_best_threshold_for_f1(y_test_true, y_test_pred_prob):
    # Search for best threshold
    thresholds = np.linspace(0, 1, 101)  # 0.00, 0.01, ..., 1.00
    f1_scores = []

    for thresh in thresholds:
        y_test_pred_class = (y_test_pred_prob >= thresh).astype(int)
        f1 = f1_score(y_test_true, y_test_pred_class)
        f1_scores.append(f1)

    # Get the best threshold
    best_idx = np.argmax(f1_scores)
    best_thresh = thresholds[best_idx]
    best_f1 = f1_scores[best_idx]

    return best_thresh, best_f1

def evaluate_model(model, test_set):
    test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

    y_test_pred = []
    y_test_true = []

    model.eval()
    model.to('cpu')
    with torch.no_grad():
        for batch in test_loader:
            try:
                X, y = batch
                X = X.to('cpu')
                logits = model(X)
            except ValueError:
                X_temporal, X_static, y = batch
                X_temporal, X_static = X_temporal.to('cpu'), X_static.to('cpu')
                logits = model(X_temporal, X_static)

            y_test_true.extend(y.squeeze().cpu().tolist())
            y_test_pred.extend(logits.squeeze().cpu().tolist())

    return y_test_true, y_test_pred

In [None]:
ARTIFACTS_PATH = "/home/basbenja/Facultad/TrabajoFinal/mountpoint/mlartifacts"
RUNS_PATH = "/home/basbenja/Facultad/TrabajoFinal/mountpoint/mlruns"

EXPERIMENT_NAME = f"{EXPERIMENT_PREFIX}-{GROUP}-Comp{COMPARISON}"

experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
experiment_id = experiment.experiment_id
experiment_artifacts_path = os.path.join(ARTIFACTS_PATH, experiment_id)
experiment_runs_path = os.path.join(RUNS_PATH, experiment_id)

runs = mlflow.search_runs(
    experiment_ids=[experiment.experiment_id],
    output_format="list",
    filter_string=f"params.model_arch = '{MODEL_ARCH}'",
)

print(EXPERIMENT_NAME, experiment_id)
print(len(runs))

In [None]:
equal_reports = {}
best_thresholds = {}

for i, run in enumerate(runs):
    run_info = run.info
    run_id = run_info.run_id
    run_name = run_info.run_name
    run_params = run.data.params
    run_artifact_uri = run.info.artifact_uri

    run_artifacts_path = os.path.join(experiment_artifacts_path, run_id, 'artifacts')
    run_params_path = os.path.join(experiment_runs_path, run_id, 'params')

    print(f"Starting for run: {run_name}. {i+1}/{len(runs)}")

    simulation = run_params['simulation']
    stata_filepath = os.path.join(GROUP_DIR, simulation + ".dta")
    df = pd.read_stata(stata_filepath)

    print("    Building datasets")
    type1_df, type2_df, type3_df = get_dfs(df, REQ_PERIODS)

    type3_train_ids_logged = mlflow.artifacts.load_dict(run_artifact_uri + "/ninis_ids_train.json")['ninis_ids_train']
    type3_test_ids_logged  = mlflow.artifacts.load_dict(run_artifact_uri + "/ninis_ids_test.json")['ninis_ids_test']

    type1_ids = type1_df.index.unique()
    n_type1_train = 1000
    type1_train_ids = np.random.choice(type1_ids, n_type1_train, replace=False)
    type1_train_df = type1_df.loc[type1_train_ids]

    type3_train_df = type3_df.loc[type3_train_ids_logged]
    type3_test_df  = type3_df.loc[type3_test_ids_logged]

    train_df = pd.concat([type1_train_df, type3_train_df])
    X_train_df, y_train_df = train_df[FEATS], train_df['target']

    test_df = pd.concat([type2_df, type3_test_df])
    X_test_df, y_test_df = test_df[FEATS], test_df['target']

    data_preprocessor = DataPreprocessor(STAT_FEATS, TEMP_FEATS)
    train_set, test_set = data_preprocessor.build_datasets(
        X_train_df, X_test_df, y_train_df, y_test_df, MODEL_ARCH
    )

    print("    Loading model")
    model_uri = f"runs:/{run_id}/trained_model"
    model = mlflow.pytorch.load_model(model_uri)

    print("    Evaluating model")
    y_test_true, y_test_pred = evaluate_model(model, test_set)

    y_test_pred = np.array(y_test_pred)
    y_test_pred_prob = 1 / (1 + np.exp(-y_test_pred))
    y_test_pred_class = (y_test_pred_prob >= 0.5).astype(int)

    print("    Getting logged classification report")
    report_logged = mlflow.artifacts.load_dict(run_artifact_uri + "/classification_report.json")
    report_dict = classification_report(y_test_true, y_test_pred_class, output_dict=True)

    if report_logged != report_dict:
        print(f"    Run {run_name} has a different classification report than logged.")
        print("    Logged report:")
        print(json.dumps(report_logged, indent=4))
        print("    Computed report:")
        print(json.dumps(report_dict, indent=4))
        break

    fpr, tpr, thresholds, area, fig, ax = roc_curve_plot(y_test_true, y_test_pred_prob)
    roc_auc_score_path = os.path.join(run_params_path, 'roc_auc_score')
    with open(roc_auc_score_path, 'w') as f:
        f.write(str(area))

    roc_auc_plot_path = os.path.join(run_artifacts_path, 'roc_curve_plot.png')
    fig.savefig(roc_auc_plot_path)
    plt.close(fig)

    best_thresh, best_f1 = get_best_threshold_for_f1(y_test_true, y_test_pred_prob)
    print(f"    Best threshold: {best_thresh:.2f} | F1 score: {best_f1:.4f} | Real F1 Score: {report_dict['1.0']['f1-score']:.4f}")
    equal_reports[run_name] = (report_logged == report_dict)
    best_thresholds[run_name] = {
        'best_threshold': best_thresh,
        'best_f1': best_f1,
        'real_f1': report_dict['1.0']['f1-score']
    }
    print()