In [2]:
import os
import numpy as np
import yaml

def load_config(config_path):
    with open(config_path, 'r') as file:
        config = yaml.safe_load(file)
    return config

def load_and_prepare_data(sec, data, name, dataset, dtw_version, config):
    data_root = config['data_root']
    paths = config['paths']
    dtw_suffix = '_withoutDTW' if dtw_version == 'without_DTW' else ''
    dtw_prefix = '' if dtw_version == 'without_DTW' else 'DTW_'

    if name == 'uncleaned':
        base_path = paths['uncleaned']['base_path'].format(data_root=data_root, dataset=dataset)
        hypo_path = os.path.join(base_path, paths['uncleaned']['file_pattern']['hypo'].format(data=data, sec=sec))
        nonhypo_path = os.path.join(base_path, paths['uncleaned']['file_pattern']['nonhypo'].format(data=data, sec=sec))
        normed = False
    else:
        base_path = paths['others']['base_path'].format(data_root=data_root, dataset=dataset, dtw_suffix=dtw_suffix)
        hypo_path = os.path.join(base_path, paths['others']['file_pattern']['hypo'].format(name=name, data=data, sec=sec, dtw_prefix=dtw_prefix))
        nonhypo_path = os.path.join(base_path, paths['others']['file_pattern']['nonhypo'].format(name=name, data=data, sec=sec, dtw_prefix=dtw_prefix))
        normed = True

    hypo_in = np.load(hypo_path)[:500]
    nonhypo_in = np.load(nonhypo_path)[:500]

    if not normed:
        if dataset == 'vitaldb' and name == 'uncleaned':
            hypo_in = hypo_in * 180.0 / 200.0
            nonhypo_in = nonhypo_in * 180.0 / 200.0
        else:
            hypo_in = (hypo_in - 20.0) / 200.0
            nonhypo_in = (nonhypo_in - 20.0) / 200.0

    if data == 'Noise':
        if name == 'uncleaned':
            noise_base_path = paths['uncleaned']['noise']['base_path'].format(data_root=data_root, dataset=dataset)
            noise_hypo_path = os.path.join(noise_base_path, paths['uncleaned']['noise']['file_pattern']['hypo'].format(sec=sec))
            noise_nonhypo_path = os.path.join(noise_base_path, paths['uncleaned']['noise']['file_pattern']['nonhypo'].format(sec=sec))
            normed = False
        else:
            noise_base_path = paths['others']['noise']['base_path'].format(data_root=data_root, dataset=dataset)
            noise_hypo_path = os.path.join(noise_base_path, paths['others']['noise']['file_pattern'][dtw_version]['hypo'].format(name=name, sec=sec, dtw_prefix=dtw_prefix))
            noise_nonhypo_path = os.path.join(noise_base_path, paths['others']['noise']['file_pattern'][dtw_version]['nonhypo'].format(name=name, sec=sec, dtw_prefix=dtw_prefix))

        noise_hypo_in = np.load(noise_hypo_path)
        noise_nonhypo_in = np.load(noise_nonhypo_path)

        if not normed:
            noise_hypo_in = (noise_hypo_in - 20.0) / 200.0
            noise_nonhypo_in = (noise_nonhypo_in - 20.0) / 200.0

        if sec != '30s':
            idx_base_path = paths['uncleaned' if name == 'uncleaned' else 'others']['idx']['base_path'].format(data_root=data_root, dataset=dataset)
            idx_hypo_path = os.path.join(idx_base_path, paths['uncleaned' if name == 'uncleaned' else 'others']['idx']['file_pattern']['hypo'].format(ids=paths['uncleaned' if name == 'uncleaned' else 'others']['idx']['ids'][dataset], sec=sec))
            idx_nonhypo_path = os.path.join(idx_base_path, paths['uncleaned' if name == 'uncleaned' else 'others']['idx']['file_pattern']['nonhypo'].format(ids=paths['uncleaned' if name == 'uncleaned' else 'others']['idx']['ids'][dataset], sec=sec))

            idx_hypo = np.load(idx_hypo_path)
            idx_nonhypo = np.load(idx_nonhypo_path)

            selected_noise_hypo_in = noise_hypo_in[idx_hypo]
            selected_noise_nonhypo_in = noise_nonhypo_in[idx_nonhypo]
        else:
            selected_noise_hypo_in = noise_hypo_in
            selected_noise_nonhypo_in = noise_nonhypo_in

        All_hypo_in = np.concatenate((hypo_in, selected_noise_hypo_in), axis=0)
        All_nonhypo_in = np.concatenate((nonhypo_in, selected_noise_nonhypo_in), axis=0)
    else:
        All_hypo_in = hypo_in
        All_nonhypo_in = nonhypo_in

    hypo_out = np.ones(All_hypo_in.shape[0])
    nonhypo_out = np.zeros(All_nonhypo_in.shape[0])

    All_hypo_in = np.clip(All_hypo_in, 0.0, 1.0)
    All_nonhypo_in = np.clip(All_nonhypo_in, 0.0, 1.0)

    return All_hypo_in, All_nonhypo_in, hypo_out, nonhypo_out

def shuffle_and_combine_data(All_hypo_in, All_nonhypo_in, hypo_out, nonhypo_out):
    hypo_pairs = list(zip(All_hypo_in, hypo_out))
    nonhypo_pairs = list(zip(All_nonhypo_in, nonhypo_out))
    combined_pairs = hypo_pairs + nonhypo_pairs
    np.random.shuffle(combined_pairs)
    shuffled_in = np.array([pair[0] for pair in combined_pairs])
    shuffled_out = np.array([pair[1] for pair in combined_pairs])
    return shuffled_in, shuffled_out

In [None]:
import os
import csv
import numpy as np
from sklearn.metrics import roc_auc_score
from tensorflow.keras.models import load_model
from data_utils import load_config, load_and_prepare_data, shuffle_and_combine_data

def evaluate_model(sec, data, name, dataset, dtw_version, config):
    hypo_in, nonhypo_in, hypo_out, nonhypo_out = load_and_prepare_data(sec, data, name, dataset, dtw_version, config)
    shuffled_in, shuffled_out = shuffle_and_combine_data(hypo_in, nonhypo_in, hypo_out, nonhypo_out)

    if sec == '30s':
        shuffled_in = shuffled_in[:, -3000:]
    elif sec == '60s':
        shuffled_in = shuffled_in[:, -6000:]

    shuffled_in = np.expand_dims(shuffled_in, axis=-1)

    model_paths = config['model']['paths'][sec]
    all_model_auc_scores = []

    for path in model_paths:
        loaded_model = load_model(path)
        model_auc_scores = []

        for i in range(10):
            y_pred = loaded_model.predict(shuffled_in)
            auc = roc_auc_score(shuffled_out, y_pred)
            model_auc_scores.append(auc)

        avg_model_auc = np.mean(model_auc_scores)
        all_model_auc_scores.append(avg_model_auc)

    avg_auc_all_models = np.mean(all_model_auc_scores)
    return avg_auc_all_models

def save_results_to_csv(results, output_path):
    os.makedirs(output_path, exist_ok=True)
    output_file = os.path.join(output_path, 'auc_results.csv')

    sorted_results = sorted(results, key=lambda x: (x[4], x[3]))

    with open(output_file, 'w', newline='') as file:
        writer = csv.writer(file)

        current_dtw = None
        current_dataset = None

        for result in sorted_results:
            model, sec, data, dataset, dtw, auc = result

            if dtw != current_dtw:
                current_dtw = dtw
                writer.writerow([])
                writer.writerow([f'DTW Version: {dtw}'])

            if dataset != current_dataset:
                current_dataset = dataset
                writer.writerow([f'DataSet: {dataset}'])

            writer.writerow([model, sec, data, auc])

    print(f'Results saved to: {output_file}')

def main():
    config_path = 'config.yaml'
    config = load_config(config_path)

    results = []
    for sec in config['sec']:
        for data in config['data_type']:
            for model in config['name_list']:
                model_results = []
                for dataset in config['dataset']:
                    for dtw_version in config['dtw_version']:
                        avg_auc = evaluate_model(sec, data, model, dataset, dtw_version, config)
                        result = [model, sec, data, dataset, dtw_version, avg_auc]
                        model_results.append(result)
                results.extend(model_results)

    output_path = './results/'
    save_results_to_csv(results, output_path)

if __name__ == '__main__':
    main()

In [8]:
import numpy as np
import os
from tensorflow.keras.models import load_model
from sklearn import metrics
import yaml
import sys
sys.path.append('./utils/')
from model_utils_90s import create_model 
import csv

# with open('config_90s.yaml', 'r') as file:
#     config = yaml.safe_load(file)


In [9]:
sec = '90s'
data_types = ['HighQual','LowQual']
model_names = ['Baseline','DI','DI_D','DI_A','HIVAE','GPVAE','SNM','BDC']
datasets = ['VitalDB','MIMIC3']

model_path = './BestModels/90s_IOH.hdf5'
input_layer_name = 'Input'
output_layer_name = 'BinOut'
input_shape = 9000
model = create_model(input_shape) 
model.load_weights(model_path) 

In [10]:
def suffle_combine_data(hypo_input, nonhypo_input, hypo_output, nonhypo_output):
    combined_input = np.concatenate((hypo_input, nonhypo_input), axis=0)
    combined_output = np.concatenate((hypo_output, nonhypo_output), axis=0)
    
    combined_indices = np.arange(len(combined_input))
    np.random.shuffle(combined_indices)
    abp_input = combined_input[combined_indices]
    abp_output = combined_output[combined_indices]
    
    abp_input = np.clip(abp_input, 0, 1)
    
    return abp_input, abp_output

In [11]:

def predict(abp_input):
    pred = model.predict(abp_input[:, -9000:], batch_size=1000, verbose=1)
    return pred


In [12]:

def evaluate(abp_output, pred):
    fpr, tpr, thresholds = metrics.roc_curve(abp_output[:], pred, pos_label=1)
    auc = metrics.auc(fpr, tpr)
    confusion_matrix = metrics.confusion_matrix(abp_output, np.where(np.reshape(pred, -1) > 0.5, 1, 0))
    return auc, confusion_matrix


In [15]:
results = [['Dataset','Input Length','Quality', 'Model', 'AUROC']]

for dataset in datasets:
    for data_type in data_types:
        for model_name in model_names:
            hypo_input = np.load(f'./ProcessedData/Evaluation/{sec}_IOH/{dataset}/{model_name}_{data_type}_Hypo.npy')
            hypo_output = np.ones(hypo_input.shape[0])
            nonhypo_input = np.load(f'./ProcessedData/Evaluation/{sec}_IOH/{dataset}/{model_name}_{data_type}_NonHypo.npy')
            nonhypo_output = np.zeros(nonhypo_input.shape[0])

            abp_input, abp_output = suffle_combine_data(hypo_input, nonhypo_input, hypo_output, nonhypo_output)
            
            pred = predict(abp_input)
            
            AUROC, _ = evaluate(abp_output, pred)
            
            results.append([dataset, sec, data_type, model_name, AUROC])
            



In [16]:
results

[['Dataset', 'Input Length', 'Quality', 'Model', 'AUROC'],
 ['VitalDB', '90s', 'HighQual', 'Baseline', 0.885092],
 ['VitalDB', '90s', 'HighQual', 'DI', 0.8922919999999999],
 ['VitalDB', '90s', 'HighQual', 'DI_D', 0.8906799999999999],
 ['VitalDB', '90s', 'HighQual', 'DI_A', 0.8890520000000001],
 ['VitalDB', '90s', 'HighQual', 'HIVAE', 0.87148],
 ['VitalDB', '90s', 'HighQual', 'GPVAE', 0.880716],
 ['VitalDB', '90s', 'HighQual', 'SNM', 0.877648],
 ['VitalDB', '90s', 'HighQual', 'BDC', 0.8867039999999999],
 ['VitalDB', '90s', 'LowQual', 'Baseline', 0.8200330578512396],
 ['VitalDB', '90s', 'LowQual', 'DI', 0.8546214876033058],
 ['VitalDB', '90s', 'LowQual', 'DI_D', 0.8589752066115703],
 ['VitalDB', '90s', 'LowQual', 'DI_A', 0.8603834710743802],
 ['VitalDB', '90s', 'LowQual', 'HIVAE', 0.8286396694214876],
 ['VitalDB', '90s', 'LowQual', 'GPVAE', 0.8192528925619834],
 ['VitalDB', '90s', 'LowQual', 'SNM', 0.8579371900826445],
 ['VitalDB', '90s', 'LowQual', 'BDC', 0.8210181818181819],
 ['MIMIC3'

In [3]:
model_name = model_names[0]
data_type = data_types[0]
dataset = datasets[0]

In [4]:
data_root = f'./ProcessedData/Evaluation/{sec}_IOH/{dataset}'

In [5]:
hypo_pattern = f'{model_name}_{data_type}_Hypo.npy'
nonhypo_pattern = f'{model_name}_{data_type}_NonHypo.npy'

In [6]:
model_path = './BestModels/90s_IOH.hdf5'
input_layer_name = 'Input'
output_layer_name = 'BinOut'
input_shape = 9000

In [7]:
model = create_model(input_shape) 

In [8]:
model.load_weights(model_path) 

In [18]:
output_path = './Results/'
os.makedirs(output_path, exist_ok=True)

with open(output_path, 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['Model', 'MAE', 'MKLD'])
    writer.writerows(results)

save_results_to_csv(results, output_path + f'/{sec}_IOH_AUROC_results.csv')

In [11]:
results = []

In [12]:
hypo_input = np.load('./ProcessedData/Evaluation/90s_IOH/MIMIC3/Baseline_HighQual_Hypo.npy')
hypo_output = np.ones(hypo_input.shape[0])
nonhypo_input = np.load('./ProcessedData/Evaluation/90s_IOH/MIMIC3/Baseline_HighQual_NonHypo.npy')
nonhypo_output = np.zeros(nonhypo_input.shape[0])

In [18]:
abp_input, abp_output = suffle_combine_data(hypo_input, nonhypo_input, hypo_output, nonhypo_output)

In [20]:
pred = predict(abp_input)



In [25]:
auc, confusion_matrix = evaluate(abp_output, pred)

In [27]:
results.append([sec, model_name, data_type, dataset, auc])

In [28]:
results

[['90s', 'Baseline', 'HighQual', 'MIMIC3', 0.982384]]

In [17]:
data_type, model_name, dataset, sec

('HighQual', 'Baseline', 'MIMIC3', '90s')

In [None]:
def load_data(name, data_type, dataset, dtw_version):
    hypo_input_path, nonhypo_input_path, hypo_input_paths, nonhypo_input_paths, hypo_index_path, nonhypo_index_path = generate_data_paths(name, data_type, sec, dataset, dtw_version)
    
    hypo_input = np.load(hypo_input_path)
    nonhypo_input = np.load(nonhypo_input_path)
    hypo_output = np.ones(hypo_input.shape[0])
    nonhypo_output = np.zeros(nonhypo_input.shape[0])
    
    if data_type == 'Noise':
        add_hypo_input = np.concatenate([np.load(path) for path in hypo_input_paths], axis=0)
        add_nonhypo_input = np.concatenate([np.load(path) for path in nonhypo_input_paths], axis=0)
        add_hypo_output = np.ones(add_hypo_input.shape[0])
        add_nonhypo_output = np.zeros(add_nonhypo_input.shape[0])
        
        idx_hypo = np.load(hypo_index_path)
        idx_nonhypo = np.load(nonhypo_index_path)
        
        if dataset == 'vitaldb' and name == 'uncleaned':
            add_hypo_input = (add_hypo_input - 20) / 200
            add_nonhypo_input = (add_nonhypo_input - 20) / 200
        
        hypo_input = np.concatenate((hypo_input, add_hypo_input[idx_hypo]), axis=0)
        nonhypo_input = np.concatenate((nonhypo_input, add_nonhypo_input[idx_nonhypo]), axis=0)
        hypo_output = np.concatenate((hypo_output, add_hypo_output), axis=0)
        nonhypo_output = np.concatenate((nonhypo_output, add_nonhypo_output), axis=0)
    
    if name == 'uncleaned':
        if dataset == 'vitaldb':
            hypo_input = hypo_input * 180.0 / 200.0
            nonhypo_input = nonhypo_input * 180.0 / 200.0
        else:
            hypo_input = (hypo_input - 20) / 200
            nonhypo_input = (nonhypo_input - 20) / 200
        
    return hypo_input, nonhypo_input, hypo_output, nonhypo_output

In [9]:
results = []

In [10]:
hypo

In [21]:
pred.shape

(500, 1)

In [22]:
abp_input.shape

(500, 12000)

In [23]:
abp_output.shape

(500,)