This notebook is used to aggregate experiment results after running `icu_benchmarks`. 

Experiment types include Base (no addition), Add Data (whole hospital data addition), and Add Subgroup (subgroup level data addition). 

For each aggregation, three files are generated:
- `avg`: the average metric values across 5 folds and 5 repetitions
- `std`: the standard deviation for all metric values across 5 folds and 5 repetitions
- `ci`: the confidence intervals (lo, hi) for all metric values across 5 folds and 5 repetitions

In [None]:
import os
import json
import glob
import pickle
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

In [None]:
# SET THESE VALUES
base_log_dir = Path("../../yaib_logs")
model = "LogisticRegression" # LogisticRegression, LGBMClassifier, or LSTM

hospital_ids = [73, 264, 420, 243, 338, 443, 199, 458, 300, 188, 252, 167]
hospital_dict = {} 
for i in range(len(hospital_ids)):
    hospital_dict[hospital_ids[i]] = i

log_dir = base_log_dir /  f'eicu/Mortality24/{model}'

## Base Results

Train on n samples from hos h, test on hos h (12 total experiments per n value)

In [None]:
# get base results
n = 2000

avg_dict = {} 
std_dict = {} 
ci_dict = {} 

for i, hos in enumerate(hospital_ids): 
    dir_str = f"train-test{hos}-n{n}"
    exp_dir = os.path.join(log_dir, dir_str)
    files = os.listdir(exp_dir)
    files.sort()
    datetime = files[-1]
    f = open(os.path.join(exp_dir, datetime, 'accumulated_test_metrics.json'))
    result_dict = json.load(f)
    for keyword, key_dict in zip(['avg', 'std'], [avg_dict, std_dict]): 
        key_results = result_dict[keyword]
        for key in key_results.keys(): 
            if key not in key_dict:
                key_dict[key] = {}
            key_dict[key][hos] = key_results[key]
    ci = result_dict['CI_0.95']
    for key in ci.keys(): 
        if key not in ci_dict:
            ci_dict[key] = {}
        ci_dict[key][hos] = (ci[key][0], ci[key][1])


np.savez(f"../results/base/{model}-n{n}-avg.npz", **avg_dict)
np.savez(f"../results/base/{model}-n{n}-std.npz", **std_dict)
np.savez(f"../results/base/{model}-n{n}-ci.npz", **ci_dict)

In [None]:
# get base results (no train set cap)

avg_dict = {} 
std_dict = {} 
ci_dict = {} 

for i, hos in enumerate(hospital_ids): 
    dir_str = f"train{hos}-test{hos}"
    exp_dir = os.path.join(log_dir, dir_str)
    files = os.listdir(exp_dir)
    files.sort()
    datetime = files[-1]
    f = open(os.path.join(exp_dir, datetime, 'accumulated_test_metrics.json'))
    result_dict = json.load(f)
    for keyword, key_dict in zip(['avg', 'std'], [avg_dict, std_dict]): 
        key_results = result_dict[keyword]
        for key in key_results.keys(): 
            if key not in key_dict:
                key_dict[key] = {}
            key_dict[key][hos] = key_results[key]
    ci = result_dict['CI_0.95']
    for key in ci.keys(): 
        if key not in ci_dict:
            ci_dict[key] = {}
        ci_dict[key][hos] = (ci[key][0], ci[key][1])


np.savez(f"../results/base/{model}-avg.npz", **avg_dict)
np.savez(f"../results/base/{model}-std.npz", **std_dict)
np.savez(f"../results/base/{model}-ci.npz", **ci_dict)

## Add Data

Train on n samples from hos i and n samples from hos j, test on hos j (12 test hospitals x 12 train hospitals x 1 model = 144 experiments per n value)

In [None]:
n = 1000

avg_dict = {} 
std_dict = {} 
ci_dict = {} 

for i, train_hos in enumerate(hospital_ids): 
    for j, test_hos in enumerate(hospital_ids): 
        if i != j: 
            dir_str = f"train{train_hos}-{test_hos}-test{test_hos}-n{n}"
        else:
            dir_str = f"train-test{train_hos}-n{n * 2}"
        exp_dir = os.path.join(log_dir, dir_str)
        files = os.listdir(exp_dir)
        files.sort()
        datetime = files[-1]
        try:
            f = open(os.path.join(exp_dir, datetime, 'accumulated_test_metrics.json'))
            result_dict = json.load(f)
        except:
            print(f"No accumulated_test_metrics.json file found for {exp_dir}")

        # Modify key_dict to store subgroup-specific matrices
        for keyword, key_dict in zip(['avg', 'std'], [avg_dict, std_dict]): 
            key_results = result_dict[keyword]
            for key in key_results.keys(): 
                # if key not in key_dict.keys(): 
                if key not in key_dict:
                    key_dict[key] = np.zeros((len(hospital_ids), len(hospital_ids)))
                
                key_dict[key][i, j] = key_results[key]

        # Handle confidence intervals separately
        ci = result_dict['CI_0.95']
        for key in ci.keys(): 
            if key not in ci_dict:
            # if key not in ci_dict.keys(): 
                ci_dict[key] = (np.zeros((len(hospital_ids), len(hospital_ids))), np.zeros((len(hospital_ids), len(hospital_ids))))
            
            ci_dict[key][0][i, j] = ci[key][0]
            ci_dict[key][1][i, j] = ci[key][1]


np.savez(f"../results/add_data/{model}-n{n}-avg.npz", **avg_dict)
np.savez(f"../results/add_data/{model}-n{n}-std.npz", **std_dict)
np.savez(f"../results/add_data/{model}-n{n}-ci.npz", **ci_dict)

## Add Subgroup

Train on n samples from hos j (randomly sampled from whole hospital) and n samples from hos i (randomly sampled from subgroup only), test on hos j (12 test hospitals x 12 train hospitals x 4 subgroups x 1 model = 576 experiments per n value)

In [None]:
avg_dict = {} 
std_dict = {} 
ci_dict = {} 

for subgroup in ['black', 'other', 'white']: 
    avg_dict[subgroup] = {}
    std_dict[subgroup] = {}
    ci_dict[subgroup] = {}
    for i, train_hos in enumerate(hospital_ids): 
        for j, test_hos in enumerate(hospital_ids): 
            if i != j: 
                dir_str = f"train{train_hos}-{test_hos}-test{test_hos}-{subgroup}"
                exp_dir = os.path.join(log_dir, dir_str)
                files = os.listdir(exp_dir)
                files.sort()
                datetime = files[-1]
                f = open(os.path.join(exp_dir, datetime, 'accumulated_test_metrics.json'))
                result_dict = json.load(f)

                # Modify key_dict to store subgroup-specific matrices
                for keyword, key_dict in zip(['avg', 'std'], [avg_dict, std_dict]): 
                    key_results = result_dict[keyword]
                    for key in key_results.keys(): 
                        # if key not in key_dict.keys(): 
                        if key not in key_dict[subgroup]:
                            key_dict[subgroup][key] = np.zeros((len(hospital_ids), len(hospital_ids)))
                        
                        key_dict[subgroup][key][i, j] = key_results[key]

                # Handle confidence intervals separately
                ci = result_dict['CI_0.95']
                for key in ci.keys(): 
                    if key not in ci_dict[subgroup]:
                    # if key not in ci_dict.keys(): 
                        ci_dict[subgroup][key] = (np.zeros((len(hospital_ids), len(hospital_ids))), np.zeros((len(hospital_ids), len(hospital_ids))))
                    
                    ci_dict[subgroup][key][0][i, j] = ci[key][0]
                    ci_dict[subgroup][key][1][i, j] = ci[key][1]
            else:
                avg_dict[subgroup][i, j] = 0
                std_dict[subgroup][i, j] = 0
                ci_dict[subgroup][i, j] = (0, 0)


np.savez(f"../results/add_subgroup/{model}-avg.npz", **avg_dict)
np.savez(f"../results/add_subgroup/{model}-std.npz", **std_dict)
np.savez(f"../results/add_subgroup/{model}-ci.npz", **ci_dict)

In [None]:
n = 1000

avg_dict = {} 
std_dict = {} 
ci_dict = {} 

for subgroup in ['black', 'other', 'white']: 
    avg_dict[subgroup] = {}
    std_dict[subgroup] = {}
    ci_dict[subgroup] = {}
    for i, train_hos in enumerate(hospital_ids): 
        for j, test_hos in enumerate(hospital_ids): 
            dir_str = f"train{train_hos}-{test_hos}-test{test_hos}-{subgroup}-n{n}"
            exp_dir = os.path.join(log_dir, dir_str)
            files = os.listdir(exp_dir)
            files.sort()
            datetime = files[-1]
            f = open(os.path.join(exp_dir, datetime, 'accumulated_test_metrics.json'))
            result_dict = json.load(f)

            # Modify key_dict to store subgroup-specific matrices
            for keyword, key_dict in zip(['avg', 'std'], [avg_dict, std_dict]): 
                key_results = result_dict[keyword]
                for key in key_results.keys(): 
                    # if key not in key_dict.keys(): 
                    if key not in key_dict[subgroup]:
                        key_dict[subgroup][key] = np.zeros((len(hospital_ids), len(hospital_ids)))
                    
                    key_dict[subgroup][key][i, j] = key_results[key]

            # Handle confidence intervals separately
            ci = result_dict['CI_0.95']
            for key in ci.keys(): 
                if key not in ci_dict[subgroup]:
                # if key not in ci_dict.keys(): 
                    ci_dict[subgroup][key] = (np.zeros((len(hospital_ids), len(hospital_ids))), np.zeros((len(hospital_ids), len(hospital_ids))))
                
                ci_dict[subgroup][key][0][i, j] = ci[key][0]
                ci_dict[subgroup][key][1][i, j] = ci[key][1]


np.savez(f"../results/add_subgroup/{model}-n{n}-avg.npz", **avg_dict)
np.savez(f"../results/add_subgroup/{model}-n{n}-std.npz", **std_dict)
np.savez(f"../results/add_subgroup/{model}-n{n}-ci.npz", **ci_dict)