In [4]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, average_precision_score
import pandas as pd
import json

In [5]:
# seeds = random.sample(range(1, 10000), 5) # code used to generate seeds

In [6]:
seeds = [8479, 227, 5413, 8179, 7528]

In [7]:
# Loading computed features
data = np.load('./data/data_scaled.npz')
X_train_scaled = data['X_train'][:40000]
X_test_scaled = data['X_test']
y_train = data['y_train'][:40000]
y_test = data['y_test']
y_finetuning1 = data['y_finetuning1']
y_finetuning2 = data['y_finetuning2']

In [None]:
# list to collect results per task
all_results = {
        "Delta-AUC-PR": [],
        "ROC-AUC": []
    }  
# list to collect results avg over task
avg_results = {
        "Delta-AUC-PR": [],
        "ROC-AUC": []
    }

# train RF five times and avg results
for run in range(5):
    results = []
    for i in range(9):
        # remove samples with NaN train
        mask_valid_train = ~np.isnan(y_train[:, i])
        X_train_scaled_valid = X_train_scaled[mask_valid_train]
        y_train_i = y_train[:, i][mask_valid_train]
        
        # remove samples with Nan test
        mask_valid_test = ~np.isnan(y_test[:, i])
        X_test_scaled_valid = X_test_scaled[mask_valid_test]
        y_test_i = y_test[:, i][mask_valid_test]
        
        # get stats for how many samples used
        deleted_train_count = len(y_train) - len(y_train_i)
        deleted_train_pct = round(deleted_train_count / len(y_train) * 100, 2)
        deleted_test_count = len(y_test) - len(y_test_i)
        deleted_test_pct = round(deleted_test_count / len(y_test) * 100, 2)
        
        # fraction of positive samples in train set (-> how rare is label 1 vs label 0)
        mask_pos_train = (y_train_i == 1)
        percent_pos_train = len(y_train_i[mask_pos_train]) / len(y_train_i)
        
        # rf 
        rf = RandomForestClassifier(random_state=seeds[run])
        rf.fit(X_train_scaled_valid, y_train_i)
        
        # predictions & metrics
        y_pred_proba = rf.predict_proba(X_test_scaled_valid)[:, 1]
        auc = roc_auc_score(y_test_i, y_pred_proba)
        delta_auc_pr = average_precision_score(y_test_i, y_pred_proba) - percent_pos_train
        
        # factor: better or worse than random: <1: worse
        factor = delta_auc_pr / percent_pos_train
        factor = round(factor, 2)
        
        # results dict
        results.append({
            "Task": i + 1,
            "used train (#)": deleted_train_count,
            "used train (%)": deleted_train_pct,
            "used test (#)": deleted_test_count,
            "used test (%)": deleted_test_pct,
            "Pos. Labels/Train": round(percent_pos_train, 6),
            "ROC-AUC": round(auc, 4),
            "Delta-AUC-PR": round(delta_auc_pr, 6),
            "Factor Better/Worse": factor
        })

        df_results = pd.DataFrame(results)
    
    # collect results for all tasks
    all_results["Delta-AUC-PR"].append(np.array(df_results["Delta-AUC-PR"]))
    all_results["ROC-AUC"].append(np.array(df_results["ROC-AUC"]))

    # collect results avg over tasks
    mean_auc_pr = np.mean(np.array(df_results["Delta-AUC-PR"]))
    mean_roc_auc = np.mean(np.array(df_results["ROC-AUC"]))
    avg_results["Delta-AUC-PR"].append(mean_auc_pr)
    avg_results["ROC-AUC"].append(mean_roc_auc)

In [None]:
# compute avg and sd over five runs
final_auc_pr = np.mean(np.array(avg_results["Delta-AUC-PR"]))
final_roc_auc = np.mean(np.array(avg_results["ROC-AUC"]))
final_sd_auc_pr = np.std(np.array(avg_results["Delta-AUC-PR"]))
final_sd_roc_auc = np.std(np.array(avg_results["ROC-AUC"]))

# save in json convertible format
final_results = [{
    "Delta-AUC-PR": final_auc_pr,
    "ROC-AUC": final_roc_auc,
    "Sd-Delta-AUC-PR": final_sd_auc_pr,
    "Sd-ROC-AUC": final_sd_roc_auc
}]

# save to json file
with open("./metrics/metrics_baseline.json", "w") as f:
    json.dump(final_results, f, indent=4)

In [None]:
all_results = {
    key: [arr.tolist() if isinstance(arr, np.ndarray) else arr for arr in value]
    for key, value in all_results.items()
}

In [None]:
# calculate mean and sd for five runs but don't average over tasks
avg_auc_pr_per_task = np.mean(np.array(all_results["Delta-AUC-PR"]), axis=0)
sd_auc_pr_per_task = np.std(np.array(all_results["Delta-AUC-PR"]), axis=0)
avg_roc_auc_per_task = np.mean(np.array(all_results["ROC-AUC"]), axis=0)
sd_roc_auc_per_task = np.std(np.array(all_results["ROC-AUC"]), axis=0)

# save in json convertible format
final_results_per_task = [{
    "Delta-AUC-PR per task": list(avg_auc_pr_per_task),
    "ROC-AUC per task": list(avg_roc_auc_per_task),
    "Sd-Delta-AUC-PR per task": list(sd_auc_pr_per_task),
    "Sd-ROC-AUC per task": list(sd_roc_auc_per_task)
}]
final_results_per_task

# write to json file
with open("./metrics/metrics_baseline_per_task.json", "w") as f:
    json.dump(final_results_per_task, f, indent=4)

In [None]:
# calculate mean and sd for five runs but do average over tasks
avg_auc_pr_per_run = np.mean(np.array(all_results["Delta-AUC-PR"]), axis=1)
sd_auc_pr_per_run = np.std(np.array(all_results["Delta-AUC-PR"]), axis=1)

avg_roc_auc_per_run = np.mean(np.array(all_results["ROC-AUC"]), axis=1)
sd_roc_auc_per_run = np.std(np.array(all_results["ROC-AUC"]), axis=1)

# save in json convertible format
final_results_per_task = [{
    "Delta-AUC-PR per run": list(avg_auc_pr_per_run),
    "ROC-AUC per run": list(avg_roc_auc_per_run),
    "Sd-Delta-AUC-PR per run": list(sd_auc_pr_per_run),
    "Sd-ROC-AUC per run": list(sd_roc_auc_per_run)
}]
final_results_per_task

# write to json file
with open("./metrics/metrics_baseline_per_run.json", "w") as f:
    json.dump(final_results_per_task, f, indent=4)