In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import matplotlib.pyplot as plt
import mlflow
import numpy as np
import torch
import os
import pandas as pd
import pprint as pp
import re

import sys
sys.path.append(os.path.dirname(os.getcwd()))

from constants import TRACKING_SERVER_URI, DATA_DIR, EXPERIMENT_PREFIX
from utils.plots import confusion_matrix_plot
from utils.load_data import get_dfs
from utils.data_preprocessor import DataPreprocessor
from sklearn.metrics import classification_report, f1_score
from torch.utils.data import DataLoader

mlflow.set_tracking_uri(TRACKING_SERVER_URI)

np.random.seed(13)
torch.manual_seed(13)

In [None]:
os.environ["MLFLOW_ENABLE_ARTIFACTS_PROGRESS_BAR"] = "False"

In [None]:
with open("../config.json", "r") as f:
    config = json.load(f)

GROUP = "Grupo" + str(config['group'])
MODEL_ARCH = config['model_arch']
COMPARISON = config['comparison']

print(f"{GROUP} - {MODEL_ARCH}")

In [None]:
GROUP_DIR = os.path.join(DATA_DIR, GROUP)
GROUP_PARAMS_FILE = os.path.join(GROUP_DIR, f"params_{GROUP}.json")
if os.path.exists(GROUP_PARAMS_FILE):
    with open(GROUP_PARAMS_FILE, 'r') as f:
        group_params = json.load(f)
else:
    print(f"Group params file not found: {GROUP_PARAMS_FILE}")

REQ_PERIODS = group_params['first_tr_period'] - 1
TEMP_FEATS = [f'y(t-{i})' for i in range(REQ_PERIODS, 0, -1)]
STAT_FEATS = ['inicio_prog']
FEATS = STAT_FEATS + TEMP_FEATS

N_PER_DEP = group_params['n_per_dep']

print(f"Períodos observados:     {REQ_PERIODS}")
print(f"Períodos de dependencia: {N_PER_DEP}")

In [None]:
def get_best_threshold_for_f1(y_test_true, y_test_pred_prob):
    thresholds = np.linspace(0, 1, 101)
    f1_scores = []

    for thresh in thresholds:
        y_test_pred_class = (y_test_pred_prob >= thresh).astype(int)
        f1 = f1_score(y_test_true, y_test_pred_class)
        f1_scores.append(f1)

    best_idx = np.argmax(f1_scores)
    best_thresh = thresholds[best_idx]
    best_f1 = f1_scores[best_idx]

    return best_thresh, best_f1


def evaluate_model(model, test_set):
    test_loader = DataLoader(test_set, batch_size=64, shuffle=False)

    y_test_pred = []
    y_test_true = []

    model.eval()
    model.to('cpu')
    with torch.no_grad():
        for batch in test_loader:
            try:
                X, y = batch
                X = X.to('cpu')
                logits = model(X)
            except ValueError:
                X_temporal, X_static, y = batch
                X_temporal, X_static = X_temporal.to('cpu'), X_static.to('cpu')
                logits = model(X_temporal, X_static)

            y_test_true.extend(y.squeeze().cpu().tolist())
            y_test_pred.extend(logits.squeeze().cpu().tolist())

    return y_test_true, y_test_pred

In [None]:
MLFLOW_STORAGE_PATH = "/home/basbenja/Facultad/TrabajoFinal/mountpoint_mlflow_storage"

ARTIFACTS_PATH = os.path.join(MLFLOW_STORAGE_PATH, "mlartifacts")
RUNS_PATH = os.path.join(MLFLOW_STORAGE_PATH, "mlruns")

EXPERIMENT_NAME = f"{EXPERIMENT_PREFIX}-{GROUP}-Comp{COMPARISON}"

experiment = mlflow.get_experiment_by_name(EXPERIMENT_NAME)
experiment_id = experiment.experiment_id

experiment_artifacts_path = os.path.join(ARTIFACTS_PATH, experiment_id)
experiment_runs_path      = os.path.join(RUNS_PATH, experiment_id)

runs = mlflow.search_runs(
    experiment_ids=[experiment_id],
    output_format="list",
    filter_string=f"params.model_arch = '{MODEL_ARCH}'",
)

print(f"{EXPERIMENT_NAME}: ID {experiment_id}")
print(f"Cantidad de runs: {len(runs)}")

In [None]:
for i, run in enumerate(runs):
    run_info = run.info
    run_id = run_info.run_id
    run_name = run_info.run_name
    run_params = run.data.params
    run_artifact_uri = run.info.artifact_uri

    print(f"Run {i+1} / {len(runs)}: {run_name} - ID {run_id}")

    run_artifacts_path = os.path.join(experiment_artifacts_path, run_id, 'artifacts')
    run_params_path    = os.path.join(experiment_runs_path     , run_id, 'params')

    print("    Loading model")
    model_uri = f"runs:/{run_id}/trained_model"
    model = mlflow.pytorch.load_model(model_uri)

    simulation = run_params['simulation']
    stata_filepath = os.path.join(GROUP_DIR, simulation + ".dta")
    df = pd.read_stata(stata_filepath)

    type3_train_ids_logged = mlflow.artifacts.load_dict(run_artifact_uri + "/ninis_ids_train.json")['ninis_ids_train']
    type3_test_ids_logged  = mlflow.artifacts.load_dict(run_artifact_uri + "/ninis_ids_test.json" )['ninis_ids_test']

    treated_orig = df[df.tratado == 1]
    control_orig = df[df.control == 1]
    nini_train_orig = df[df.id.isin(type3_train_ids_logged)]
    nini_test_orig  = df[df.id.isin(type3_test_ids_logged)]

    tr_starts = df.inicio_prog.unique()[df.inicio_prog.unique() > 0]

    cohorts_avg_f1 = 0
    cohort_avg_precision = 0
    cohort_avg_recall = 0

    for tr_start in tr_starts:
        print(f"    Inicio de programa {tr_start}")

        treated_in_cohort = treated_orig[treated_orig.inicio_prog == tr_start]
        control_in_cohort = control_orig[control_orig.inicio_prog == tr_start]

        df_cohort = pd.concat([
            treated_in_cohort,
            control_in_cohort,
            nini_train_orig,
            nini_test_orig,
        ])

        type1_df, type2_df, type3_df = get_dfs(df_cohort, REQ_PERIODS)

        type3_train_df = type3_df.loc[type3_train_ids_logged]
        type3_test_df  = type3_df.loc[type3_test_ids_logged]

        # Esto no se usa para nada, solo para poder llamar a la función
        # `build_datasets` del DataPreprocessor
        train_df = pd.concat([type1_df, type3_train_df])
        X_train_df, y_train_df = train_df[FEATS], train_df['target']

        test_df = pd.concat([type2_df, type3_test_df])
        X_test_df, y_test_df = test_df[FEATS], test_df['target']

        data_preprocessor = DataPreprocessor(STAT_FEATS, TEMP_FEATS)
        train_set, test_set = data_preprocessor.build_datasets(
            X_train_df, X_test_df, y_train_df, y_test_df, MODEL_ARCH
        )

        print("    Evaluating model")
        y_test_true, y_test_pred = evaluate_model(model, test_set)

        y_test_pred = np.array(y_test_pred)
        y_test_pred_prob = 1 / (1 + np.exp(-y_test_pred))
        y_test_pred_class = (y_test_pred_prob >= 0.5).astype(int)

        inicio_prog_dir_path = os.path.join(run_artifacts_path, f"inicio_prog_{tr_start}")
        os.makedirs(inicio_prog_dir_path, exist_ok=True)

        report_dict = classification_report(y_test_true, y_test_pred_class, output_dict=True)
        report_path = os.path.join(inicio_prog_dir_path, "classification_report.json")
        with open(report_path, 'w') as f:
            json.dump(report_dict, f)

        fig, ax, confusion_dict = confusion_matrix_plot(y_test_true, y_test_pred_class)
        conf_matrix_path = os.path.join(inicio_prog_dir_path, "confusion_matrix_plot.png")
        fig.savefig(conf_matrix_path)
        plt.close(fig)

        confusion_dict_path = os.path.join(inicio_prog_dir_path, "confusion_dict.json")
        with open(confusion_dict_path, 'w') as f:
            json.dump(confusion_dict, f)

        cohorts_avg_f1 += report_dict['1.0']['f1-score']
        cohort_avg_precision += report_dict['1.0']['precision']
        cohort_avg_recall += report_dict['1.0']['recall']

    cohorts_avg_f1 /= len(tr_starts)
    cohort_avg_precision /= len(tr_starts)
    cohort_avg_recall /= len(tr_starts)

    for k, v in {
        "cohorts_avg_f1": cohorts_avg_f1,
        "cohort_avg_precision": cohort_avg_precision,
        "cohort_avg_recall": cohort_avg_recall
    }.items():
        metric_path = os.path.join(run_params_path, k)
        with open(metric_path, 'w') as f:
            f.write(str(v))