In [1]:
import numpy as np
import pandas as pd
import os

# K Fold Results

In [30]:
c_index_by_fold = []
mtd_auc_by_fold = []
for i in range(4):
    result_file = "split" + str(i) + "/results.txt"
    results_df = pd.read_csv(result_file, sep='\t', header=None, names=["Experiment name", "C-index", "MTD-AUC"])
    
    c_index_by_fold.append(results_df["C-index"].to_numpy())
    mtd_auc_by_fold.append(results_df["MTD-AUC"].to_numpy())
    
c_index = np.stack(c_index_by_fold, axis=0)
avg_c_index = np.mean(c_index, axis=0)
std_c_index = np.std(c_index, axis=0)
str_c_index = ["{:.2f} ± {:.2f}".format(mean, std) for (mean, std) in zip(avg_c_index, std_c_index)]

mtd_auc = np.stack(mtd_auc_by_fold, axis=0)
avg_mtd_auc = np.mean(mtd_auc, axis=0)
std_mtd_auc = np.std(mtd_auc, axis=0)
str_mtd_auc = ["{:.2f} ± {:.2f}".format(mean, std) for (mean, std) in zip(avg_mtd_auc, std_mtd_auc)]

k_fold_results_df = pd.DataFrame(data={"Experiment name": results_df["Experiment name"].to_numpy(), "C-index": str_c_index, "MTD-AUC": str_mtd_auc})
k_fold_results_df[["Model", "Data", "Subset"]] = k_fold_results_df["Experiment name"].str.split("-", expand=True)
k_fold_results_df.drop(columns='Subset', inplace=True)
k_fold_results_df.replace("CPH_lifelines", "CPH", inplace=True)
display(k_fold_results_df)

Unnamed: 0,Experiment name,C-index,MTD-AUC,Model,Data
0,CPH_lifelines-clinical-all,43.88 ± 7.84,42.78 ± 10.69,CPH,clinical
1,RSF-clinical-all,45.11 ± 7.59,47.41 ± 12.72,RSF,clinical
2,CPH_lifelines-radiomics1-all,59.61 ± 5.91,63.72 ± 7.94,CPH,radiomics1
3,RSF-radiomics1-all,60.63 ± 3.35,64.98 ± 5.32,RSF,radiomics1
4,CPH_lifelines-radiomics2-all,59.80 ± 8.67,64.20 ± 10.00,CPH,radiomics2
5,RSF-radiomics2-all,48.67 ± 5.94,50.74 ± 5.86,RSF,radiomics2
6,CPH_lifelines-radiomics3-all,59.76 ± 9.71,64.81 ± 10.92,CPH,radiomics3
7,RSF-radiomics3-all,49.64 ± 4.12,56.26 ± 5.61,RSF,radiomics3


# Compute Original Train/Test Split Label Distribution

In [18]:
def compute_label_stats(y):
    events = []
    durations = []
    for (e,d) in y:
        events.append(e)
        assert (e == 0 or e == 1)
        durations.append(d)
    avg_duration = np.mean(durations)
    event_percent = np.sum(events)/len(events)
    return event_percent, avg_duration

In [23]:
from data_loader import get_pids_split
from results_utils import *
from glob import glob

import pandas as pd
import numpy as np

data_root = '../data/'
csv_path = '../data/'
file_list = glob('../data/pyradiomics_features/'+'/*')
radiomics_feature_name_file = '../data/'

train_pos, val_pos, test_pos, _, _, _ = get_pids_split(data_root+'pids.txt')

prsndf = pd.read_csv(csv_path+'nlst_15kpct_prsn_062119.csv')
prsndf = prsndf[prsndf['scr_group']== 1] # cancer positive

pids_radiomics = []
radiomics = []
for f in file_list:
    pids_radiomics.append(int(f.split('/')[-1].split('.')[0]))
    radiomics.append(np.load(f)['arr_0'])
radiomics = np.array(radiomics)
df_radiomics = pd.DataFrame(radiomics, columns=[str(i) for i in range(np.shape(radiomics)[1])], index=pids_radiomics)

prsndf = prsndf[prsndf['pid'].isin(pids_radiomics)]
prsndf_rad = prsndf.join(df_radiomics, on='pid')
raddf = prsndf_rad.iloc[:,-107:]
numericals = [str(i) for i in range (107)]

#########################################################
print("Clinical only:")

x_train, y_train, df_train = parse_clinical(train_pos+val_pos, prsndf)
x_test, y_test, df_test = parse_clinical(test_pos, prsndf)

train_event_percent, train_average_duration = compute_label_stats(y_train)
test_event_percent, test_average_duration = compute_label_stats(y_test)

print (f"Train death percent: {round(train_event_percent*100, 2)}%")
print (f"Train average duration: {round(train_average_duration, 2)} days")
print (f"Test death percent: {round(test_event_percent*100, 2)}%")
print (f"Test average duration: {round(test_average_duration, 2)} days")

  interactivity=interactivity, compiler=compiler, result=result)


Clinical only:

Radiomics features only:
Train death percent: 29.33%
Train average duration: 1851.78 days
Test death percent: 32.73%
Test average duration: 1885.18 days
