In [None]:
import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd
from iblm import IBLMClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score
from tqdm import tqdm

sys.path.append("..")

## evaluation

In [None]:
def code_model_evaluation(model_file: str, test_df: pd.DataFrame, target_colname: str) -> dict:
    model_file = Path(model_file)
    model_name = model_file.stem
    import_file = f"import models.{model_file.parent.name}.{model_name} as codemodel"
    with open(model_file, "r") as fp:
        code = fp.read()

    try:
        exec(import_file, globals())
    except:
        result = dict(
            model_name=model_name,
            status="FAILED",
            comment="1_import_error",
            acc=None,
            auc=None,
            code=code,
        )
        return result

    try:
        x_test = test_df.drop(target_colname, axis=1)
        y_test = test_df[target_colname]
        y_proba = codemodel.predict(x_test)
        y_pred = (y_proba > 0.5).astype(int)
        negative_values_exist = np.any(y_proba < 0)
        values_greater_than_one_exist = np.any(y_proba > 1)
        if negative_values_exist:
            result = dict(
                model_name=model_name,
                status="FAILED",
                # comment="negative_values_exist",
                comment="3_invalid_pred_value",
                acc=None,
                auc=None,
                code=code,
            )

        elif values_greater_than_one_exist:
            result = dict(
                model_name=model_name,
                status="FAILED",
                # comment="values_greater_than_one_exist",
                comment="3_invalid_pred_value",
                acc=None,
                auc=None,
                code=code,
            )

        else:
            roc_auc = roc_auc_score(y_test, y_proba)
            accuracy = round(accuracy_score(y_test, y_pred), 4)
            result = dict(
                model_name=model_name,
                status="SUCCEEDED",
                comment="0_succeeded",
                acc=accuracy,
                auc=roc_auc,
                code=code,
            )
    except Exception as e:
        result = dict(
            model_name=model_name,
            status="FAILED",
            comment="2_predict_method_error",
            acc=None,
            auc=None,
            code=code,
        )

    finally:
        return result

In [None]:
def evaluate_all_code_models(model_name: str, seeds: list, train_nums: list, target_colname: str) -> pd.DataFrame:
    """this is for next models
    * moon
    * pseudodata
    * titanic
    """
    results = []
    for seed in tqdm(seeds):
        df = pd.read_csv(f"../data/{model_name}/{model_name}_{seed}_300_test.csv")
        for train_num in tqdm(train_nums, leave=False):
            for trial in range(1, 31):
                model_file = f"../models/{model_name}/{model_name}_{seed}_{train_num}_{trial}.py"
                results.append(
                    dict(
                        seed=seed,
                        train_num=train_num,
                        **code_model_evaluation(model_file, df, target_colname),
                    )
                )
    print("Done!")
    return pd.DataFrame(results)


def evaluate_all_code_models_circle(
    model_name: str, seeds: list, train_nums: list, target_colname: str
) -> pd.DataFrame:
    """this is for next models
    * circle
    """
    results = []
    for seed in tqdm(seeds):
        df = pd.read_csv(f"../data/{model_name}/{model_name}_300_test.csv")
        for train_num in tqdm(train_nums, leave=False):
            for trial in range(1, 31):
                model_file = f"../models/{model_name}/{model_name}_{trial}.py"
                results.append(
                    dict(
                        seed=seed,
                        train_num=train_num,
                        **code_model_evaluation(model_file, df, target_colname),
                    )
                )
    print("Done!")
    return pd.DataFrame(results)


def evaluate_all_code_models_text(model_name: str, seeds: list, train_nums: list, target_colname: str) -> pd.DataFrame:
    """this is for next models
    * text
    """
    results = []
    for seed in tqdm(seeds):
        df = pd.read_csv(f"../data/{model_name}/{model_name}_25_test.csv")
        for train_num in tqdm(train_nums, leave=False):
            for trial in range(1, 31):
                model_file = f"../models/{model_name}/{model_name}_{trial}.py"
                results.append(
                    dict(
                        seed=seed,
                        train_num=train_num,
                        **code_model_evaluation(model_file, df, target_colname),
                    )
                )
    print("Done!")
    return pd.DataFrame(results)

## dataframe processing

In [None]:
def metrics_summary(df: pd.DataFrame) -> None:  # , output_dir: str) -> None:
    summary_df = df.groupby(["seed", "train_num"], as_index=False).agg(
        dict(
            status=["count"],
            acc=["count", "mean", "min", "max"],
            auc=["mean", "min", "max"],
        )
    )

    # multi-columns to single-columns
    renamed_colnames = ["_".join(x).strip("_") for x in summary_df.columns]
    summary_df.columns = summary_df.columns.droplevel(0)
    summary_df.columns = renamed_colnames
    summary_df = summary_df.rename(columns=dict(status_count="n_trials", acc_count="n_succeeses"))

    return summary_df


def code_model_execution_trial_count_summary(df: pd.DataFrame) -> pd.DataFrame:
    df = df.comment.value_counts().to_frame().T.loc[:, sorted(df.comment.unique())].reset_index(drop=True)
    df.insert(0, "n_trials", df.sum(axis=1).values[0])
    return df


def code_model_execution_trial_count_each_case(df: pd.DataFrame) -> pd.DataFrame:
    df = (
        df.groupby(["seed", "train_num", "comment"], as_index=False)
        .agg(dict(model_name="count"))
        .rename(columns=dict(model_name="n_events"))
        .pivot(index=["seed", "train_num"], columns="comment", values="n_events")
        .fillna("0")
        .astype(int)
        .loc[:, sorted(df.comment.unique())]
    )
    df.insert(0, "n_trials", df.sum(axis=1).values[0])
    return df

## save dataframe

In [None]:
def save_raw_results(df: pd.DataFrame, output_dir: str) -> None:
    df = df.sort_values(["seed", "train_num", "auc"], ascending=[True, True, False])
    df.to_csv(f"{output_dir}/raw_results.tsv", sep="\t", index=False)


def save_metrics_summary(df: pd.DataFrame, output_dir: str) -> None:
    df.to_csv(f"{output_dir}/metrics_summary.tsv", sep="\t", index=False)


def save_code_model_execution_trial_count_summary(df: pd.DataFrame, output_dir: str) -> None:
    df.to_csv(f"{output_dir}/code_model_execution_trial_count_summary.tsv", sep="\t", index=False)


def save_code_model_execution_trial_count_each_case(df: pd.DataFrame, output_dir: str) -> None:
    df.to_csv(f"{output_dir}/code_model_execution_trial_count_each_case.tsv", sep="\t", index=False)

## main

### pseudodata

In [None]:
model_name = "pseudodata"
seeds = [3655, 3656, 3657]
train_nums = [10, 20, 30, 40, 50, 100, 200, 300]
target_colname = "target"

output_dir = Path(f"../data/code_model_evaluation/{model_name}")
output_dir.mkdir(exist_ok=True, parents=True)

df = evaluate_all_code_models(model_name, seeds, train_nums, target_colname)

metrics_summary_df = metrics_summary(df)
code_model_execution_trial_count_summary_df = code_model_execution_trial_count_summary(df)
code_model_execution_trial_count_each_case_df = code_model_execution_trial_count_each_case(df)

save_raw_results(df, output_dir)
save_metrics_summary(metrics_summary_df, output_dir)
save_code_model_execution_trial_count_summary(code_model_execution_trial_count_summary_df, output_dir)
save_code_model_execution_trial_count_each_case(code_model_execution_trial_count_each_case_df, output_dir)

### moon

In [None]:
model_name = "moon"
seeds = [3655, 3656, 3657]
train_nums = [10, 20, 30, 40, 50, 100, 200, 300]
target_colname = "target"

output_dir = Path(f"../data/code_model_evaluation/{model_name}")
output_dir.mkdir(exist_ok=True, parents=True)

df = evaluate_all_code_models(model_name, seeds, train_nums, target_colname)

metrics_summary_df = metrics_summary(df)
code_model_execution_trial_count_summary_df = code_model_execution_trial_count_summary(df)
code_model_execution_trial_count_each_case_df = code_model_execution_trial_count_each_case(df)

save_raw_results(df, output_dir)
save_metrics_summary(metrics_summary_df, output_dir)
save_code_model_execution_trial_count_summary(code_model_execution_trial_count_summary_df, output_dir)
save_code_model_execution_trial_count_each_case(code_model_execution_trial_count_each_case_df, output_dir)

### titanic

In [None]:
model_name = "titanic"
seeds = [3655, 3656, 3657]
train_nums = [6, 8, 10, 20, 30, 40, 50]
target_colname = "survived"

output_dir = Path(f"../data/code_model_evaluation/{model_name}")
output_dir.mkdir(exist_ok=True, parents=True)

df = evaluate_all_code_models(model_name, seeds, train_nums, target_colname)

metrics_summary_df = metrics_summary(df)
code_model_execution_trial_count_summary_df = code_model_execution_trial_count_summary(df)
code_model_execution_trial_count_each_case_df = code_model_execution_trial_count_each_case(df)

save_raw_results(df, output_dir)
save_metrics_summary(metrics_summary_df, output_dir)
save_code_model_execution_trial_count_summary(code_model_execution_trial_count_summary_df, output_dir)
save_code_model_execution_trial_count_each_case(code_model_execution_trial_count_each_case_df, output_dir)

### circle

In [None]:
model_name = "circle"
seeds = [""]
train_nums = [""]
target_colname = "Target"

output_dir = Path(f"../data/code_model_evaluation/{model_name}")
output_dir.mkdir(exist_ok=True, parents=True)

df = evaluate_all_code_models_circle(model_name, seeds, train_nums, target_colname)

metrics_summary_df = metrics_summary(df)
code_model_execution_trial_count_summary_df = code_model_execution_trial_count_summary(df)
code_model_execution_trial_count_each_case_df = code_model_execution_trial_count_each_case(df)

save_raw_results(df, output_dir)
save_metrics_summary(metrics_summary_df, output_dir)
save_code_model_execution_trial_count_summary(code_model_execution_trial_count_summary_df, output_dir)
save_code_model_execution_trial_count_each_case(code_model_execution_trial_count_each_case_df, output_dir)

### text

In [None]:
model_name = "text"
seeds = [""]
train_nums = [""]
target_colname = "Target"

output_dir = Path(f"../data/code_model_evaluation/{model_name}")
output_dir.mkdir(exist_ok=True, parents=True)

df = evaluate_all_code_models_text(model_name, seeds, train_nums, target_colname)

metrics_summary_df = metrics_summary(df)
code_model_execution_trial_count_summary_df = code_model_execution_trial_count_summary(df)
code_model_execution_trial_count_each_case_df = code_model_execution_trial_count_each_case(df)

save_raw_results(df, output_dir)
save_metrics_summary(metrics_summary_df, output_dir)
save_code_model_execution_trial_count_summary(code_model_execution_trial_count_summary_df, output_dir)
save_code_model_execution_trial_count_each_case(code_model_execution_trial_count_each_case_df, output_dir)