In [1]:
import os, re, json
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

from pathlib import Path
from scipy.stats import pearsonr

all_hosps = "73-264-420-243-338-443-199-458-300-188-252-167-122"
hosp_ids = [73, 264, 420, 243, 338, 443, 199, 458, 300, 188, 252, 167, 122]

model = "K29"
log_dir = Path(f"../../yaib_logs/eicu/Mortality24/{model}/")
data_dir = Path("../../YAIB-cohorts/data/mortality24/eicu")

In [2]:
df = pd.read_parquet('../../YAIB-cohorts/data/mortality24/eicu/sta.parquet', engine='pyarrow')
df_outc = pd.read_parquet('../../YAIB-cohorts/data/mortality24/eicu/outc.parquet', engine='pyarrow') # add outcome data

df = df.merge(df_outc, on='stay_id')

## K29 Results

Only 1 fold 1 repetition
Training on eicu with 31721 samples and validating on eicu with 3525 samples

/home/tane/YAIB/yaib_logs/eicu/Mortality24/K29/train73-264-420-243-338-443-199-458-300-188-252-167-122-test300/2025-12-
10T06-29-00/repetition_0/fold_0

In [3]:
# K29 one-to-one
n=1000
base_avg_dict = {} 

for i, hos in enumerate(hosp_ids): 
    dir_str = f"train-test{hos}-n{n}"
    exp_dir = os.path.join(log_dir, dir_str)
    try:
        files = os.listdir(exp_dir)
        files.sort()
        datetime = files[-1]
        f = open(os.path.join(exp_dir, datetime, 'accumulated_test_metrics.json'))
        result_dict = json.load(f)
        for keyword, key_dict in zip(['avg'], [base_avg_dict]): 
            key_results = result_dict[keyword]
            for key in key_results.keys(): 
                if key not in key_dict:
                    key_dict[key] = {}
                key_dict[key][hos] = key_results[key]
    except Exception as e:
        print(exp_dir)
        print(e)
        print()

In [4]:
# K29 all data
all_avg_dict = {} 

for i, hos in enumerate(hosp_ids): 
    dir_str = f"train{all_hosps}-test{hos}"
    exp_dir = os.path.join(log_dir, dir_str)
    try:
        files = os.listdir(exp_dir)
        files.sort()
        datetime = files[-1]
        f = open(os.path.join(exp_dir, datetime, 'accumulated_test_metrics.json'))
        result_dict = json.load(f)
        for keyword, key_dict in zip(['avg'], [all_avg_dict]): 
            key_results = result_dict[keyword]
            for key in key_results.keys(): 
                if key not in key_dict:
                    key_dict[key] = {}
                key_dict[key][hos] = key_results[key]
    except Exception as e:
        print(exp_dir)
        print(e)
        print()

In [5]:
catboost_dict = {} 

for i, hos in enumerate(hosp_ids): 
    dir_str = f"train{all_hosps}-test{hos}_id"
    exp_dir = os.path.join(Path(f"../../yaib_logs/eicu/Mortality24/CatBoostClassifier/"), dir_str)
    try:
        files = os.listdir(exp_dir)
        files.sort()
        datetime = files[-1]
        f = open(os.path.join(exp_dir, datetime, 'accumulated_test_metrics.json'))
        result_dict = json.load(f)
        for keyword, key_dict in zip(['avg'], [catboost_dict]): 
            key_results = result_dict[keyword]
            for key in key_results.keys(): 
                if key not in key_dict:
                    key_dict[key] = {}
                key_dict[key][hos] = key_results[key]
    except Exception as e:
        print(exp_dir)
        print(e)
        print()

rbf_dict = {} 

for i, hos in enumerate(hosp_ids): 
    dir_str = f"train{all_hosps}-test{hos}_id"
    exp_dir = os.path.join(Path(f"../../yaib_logs/eicu/Mortality24/LogisticRegressionRBF/"), dir_str)
    try:
        files = os.listdir(exp_dir)
        files.sort()
        datetime = files[-1]
        f = open(os.path.join(exp_dir, datetime, 'accumulated_test_metrics.json'))
        result_dict = json.load(f)
        for keyword, key_dict in zip(['avg'], [rbf_dict]): 
            key_results = result_dict[keyword]
            for key in key_results.keys(): 
                if key not in key_dict:
                    key_dict[key] = {}
                key_dict[key][hos] = key_results[key]
    except Exception as e:
        print(exp_dir)
        print(e)
        print()

In [6]:
all_df = pd.DataFrame.from_dict(all_avg_dict)
base_df = pd.DataFrame.from_dict(base_avg_dict)

catboost_df = pd.DataFrame.from_dict(catboost_dict)
rbf_df = pd.DataFrame.from_dict(rbf_dict)

In [7]:
col = 'R2'

pd.DataFrame({'K29 base': base_df[col], 'K29 all': all_df[col], 'catboost': catboost_df[col], 'LR w/ RBF': rbf_df[col]})

Unnamed: 0,K29 base,K29 all,catboost,LR w/ RBF
73,-0.222261,-0.021064,0.046833,-11.515976
264,-0.050929,-0.052807,0.104106,-1.888026
420,0.04553,-0.06171,0.176318,-2.200812
243,-0.054249,-0.043313,0.011545,-2.777152
338,-0.026087,-0.056067,0.178734,-1.977021
443,-0.026434,-0.047379,0.112078,-2.220977
199,-0.045383,-0.058716,0.250101,-1.951709
458,0.115893,-0.006103,0.236153,-1.599
300,-0.034759,-0.024733,0.194505,-2.510895
188,-0.034638,-0.009957,0.214688,-2.252113
