In [None]:
import os
import random
import wandb

import utility_new as pc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.sequence import pad_sequences

from keras.utils import plot_model
from keras import layers
from keras.layers import Input, Dense, Dropout, Activation, BatchNormalization, Add
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPool1D, ZeroPadding1D, LSTM, Bidirectional
from keras.models import Sequential, Model
from keras.utils import plot_model
from keras.layers import Concatenate

from sklearn.metrics import confusion_matrix

import scipy
from scipy import optimize
from scipy.io import loadmat
from scipy.signal import butter, filtfilt

import ecg_plot
import heartpy as hp
import pywt
import utility_new as pc

from itables import init_notebook_mode

init_notebook_mode(all_interactive=True)

%load_ext autoreload
%autoreload
%reload_ext autoreload

In [None]:
all_available_classes=['270492004','164889003', '164890007', '426627000','713427006', '713426002','445118002', '39732003','164909002', '251146004','698252002', '10370003','284470004','427172004','164947007', '111975006','164917005', '47665007','59118001', '427393009','426177001', '426783006','427084000', '63593006','164934002', '59931005', '17338001']
path_G="/Study - ECG Atrial Fibrillation Prediction With Open Access Data/archive/G12ECG/WFDB/"
path_P="/Study - ECG Atrial Fibrillation Prediction With Open Access Data/archive/PTB_XL/WFDB/"
path_C="H:/CORAI_code_data/G_drive/Study - ECG Atrial Fibrillation Prediction With Open Access Data/archive/CPSC_Extra/"
positive_classes  = ['164889003', '164890007']
dataset_paths = [path_C, path_G, path_P]

In [None]:
calculated_metrics_df= pd.read_csv('/Study - ECG Atrial Fibrillation Prediction With Open Access Data/trained_models/Final_frame_to_work_withFilterSignalLengthCheck_new.csv')
##as data saved in harddrive
old_path = 'C:/Users/Admin/Downloads'
#new_path = 'G:/Study - ECG Atrial Fibrillation Prediction With Open Access Data'
new_path = '/Study - ECG Atrial Fibrillation Prediction With Open Access Data'

calculated_metrics_df['Filename'] = calculated_metrics_df['Filename'].str.replace(old_path, new_path)
##To handel the nan values: Only for HRV data, HRV features has nan values so remove them before training the model
calculated_metrics_df = calculated_metrics_df.dropna()  # Drops rows with NaN values
len(calculated_metrics_df)

data_ecg_filenames=calculated_metrics_df['Filename']
encoded_labels=calculated_metrics_df['Encoded_labels']
age=calculated_metrics_df['Age']
gender=calculated_metrics_df['Gender']
HeartRate=calculated_metrics_df['HeartRate']
InterBeatInterval=calculated_metrics_df['InterBeatInterval']
HRV_SDNN=calculated_metrics_df['HRV_SDNN']
HRV_RMSSD=calculated_metrics_df['HRV_RMSSD']
PNN20=calculated_metrics_df['PNN20']
PNN50=calculated_metrics_df['PNN50']
HR_MAD=calculated_metrics_df['HR_MAD']
Ratio_of_SD1_SD2=calculated_metrics_df['Ratio_of_SD1_SD2']
InfoS=calculated_metrics_df['InfoS']

In [None]:
class ECGDataGenerator_all_demo_M(Sequence):
    def __init__(self, filenames, labels=None, age_data=None, gender_data=None, HeartRate=None,InterBeatInterval=None,HRV_SDNN=None,HRV_RMSSD=None,HR_MAD=None,Ratio_of_SD1_SD2=None,batch_size=32, is_training=True):
        self.filenames = filenames
        self.labels = labels
        self.age_data = age_data
        self.gender_data = gender_data
        self.HeartRate = HeartRate
        self.InterBeatInterval= InterBeatInterval
        self.HRV_SDNN = HRV_SDNN
        self.HRV_RMSSD = HRV_RMSSD
        self.HR_MAD = HR_MAD
        self.Ratio_of_SD1_SD2 = Ratio_of_SD1_SD2
        self.batch_size = batch_size
        self.is_training = is_training

        # Create a dictionary to map filenames to age and gender data
        self.filename_to_data = {filename: (age, gender,HeartRate,InterBeatInterval,HRV_SDNN,HRV_RMSSD,HR_MAD,Ratio_of_SD1_SD2) for filename, age, gender,HeartRate,InterBeatInterval,HRV_SDNN,HRV_RMSSD,HR_MAD,Ratio_of_SD1_SD2 in zip(filenames, age_data, gender_data,HeartRate,InterBeatInterval,HRV_SDNN,HRV_RMSSD,HR_MAD,Ratio_of_SD1_SD2)}

    def __len__(self):
        return int(np.ceil(len(self.filenames) / self.batch_size))

    def __getitem__(self, idx):
        batch_filenames = self.filenames[idx * self.batch_size:(idx + 1) * self.batch_size]


        batch_demo_data = [self.filename_to_data[filename] for filename in batch_filenames]
        #batch_demo_data = [demo_data for filename, demo_data in zip(batch_filenames, [self.filename_to_data[filename] for filename in batch_filenames])]

        # Reshape demo_data to match the expected shape for inputB
        batch_demo_data = np.array(batch_demo_data)

        #batch_data = [self.load_data(filename, demo_data, label) for filename, demo_data, label in zip(batch_filenames, batch_demo_data, batch_labels)]
        batch_data = [self.load_data(filename, demo_data, label) for filename, demo_data, label in zip(batch_filenames, batch_demo_data, self.labels[idx * self.batch_size:(idx + 1) * self.batch_size])]

        # Assuming load_data returns a tuple of (input_data, target_data)
        inputs, demo_data, targets = zip(*batch_data)

        # Convert labels to one-hot encoded format
        one_hot_targets = to_categorical(targets, num_classes=2)

        # Modify to handle the demographic data correctly
        if self.labels is not None:
           # batch_labels = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]
            #inputs, demo_data, targets = zip(*batch_data)
            return [np.array(inputs), np.array(demo_data)], np.array(one_hot_targets)
        else:
            #inputs, demo_data= zip(*batch_data)
            return [np.array(inputs), np.array(demo_data)], None

    def load_data(self, filename, demo_data, label):
        # Load data from the MAT file or any other required preprocessing
        x = loadmat(filename)
        data = np.asarray(x['val'], dtype=np.float64)
        ecg_data =pad_sequences(data, maxlen=5000, truncating='post',padding="post")
        # Transpose the data to reshape it to (5000, 12)
        ecg_data = np.transpose(ecg_data)

        return ecg_data, demo_data, label

In [None]:
def vgg_16_model_demo_HRV_M():

    inputlayer = keras.layers.Input(shape=(5000,12)) 
    inputB = keras.layers.Input(shape=(8,))

    conv1 = keras.layers.Conv1D(filters=64, kernel_size=3, padding='same',input_shape=(5000,12))(inputlayer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.Activation(activation='relu')(conv1)
    conv1 = keras.layers.Conv1D(filters=64, kernel_size=3, padding='same')(conv1)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.Activation(activation='relu')(conv1)
    conv1 = keras.layers.MaxPool1D(pool_size=2, strides=2, padding='same')(conv1)

    conv2 = keras.layers.Conv1D(filters=128, kernel_size=3, padding='same')(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.Activation('relu')(conv2)
    conv2 = keras.layers.Conv1D(filters=128, kernel_size=3, padding='same')(conv2)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.Activation('relu')(conv2)
    conv2 = keras.layers.MaxPool1D(pool_size=2, strides=2, padding='same')(conv2)

    conv3 = keras.layers.Conv1D(filters=256, padding='same', kernel_size=3)(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)
    conv3 = keras.layers.Conv1D(filters=256, padding='same', kernel_size=3)(conv3)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)
    conv3 = keras.layers.Conv1D(filters=256, padding='same', kernel_size=3)(conv3)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.Activation('relu')(conv3)
    conv3 = keras.layers.MaxPool1D(pool_size=2, strides=2, padding='same')(conv3)

    conv4 = keras.layers.Conv1D(filters=512, kernel_size=3, padding='same')(conv3)
    conv4 = keras.layers.BatchNormalization()(conv4)
    conv4 = keras.layers.Activation('relu')(conv4)
    conv4 = keras.layers.Conv1D(filters=512, kernel_size=3, padding='same')(conv4)
    conv4 = keras.layers.BatchNormalization()(conv4)
    conv4 = keras.layers.Activation('relu')(conv4)
    conv4 = keras.layers.Conv1D(filters=512, kernel_size=3, padding='same')(conv4)
    conv4 = keras.layers.BatchNormalization()(conv4)
    conv4 = keras.layers.Activation('relu')(conv4)
    conv4 = keras.layers.MaxPool1D(pool_size=2, strides=2, padding='same')(conv4)

    conv5 = keras.layers.Conv1D(filters=512, kernel_size=3, padding='same')(conv4)
    conv5 = keras.layers.BatchNormalization()(conv5)
    conv5 = keras.layers.Activation('relu')(conv5)
    conv5 = keras.layers.Conv1D(filters=512, kernel_size=3, padding='same')(conv5)
    conv5 = keras.layers.BatchNormalization()(conv5)
    conv5 = keras.layers.Activation('relu')(conv5)
    conv5 = keras.layers.Conv1D(filters=512, kernel_size=3, padding='same')(conv5)
    conv5 = keras.layers.BatchNormalization()(conv5)
    conv5 = keras.layers.Activation('relu')(conv5)
    conv5 = keras.layers.MaxPool1D(pool_size=2, strides=2, padding='same')(conv5)
    

    gap_layer = keras.layers.GlobalAveragePooling1D()(conv5)
    outputlayer1 = keras.layers.Dense(256, activation='relu')(gap_layer)
    outputlayer1= keras.layers.Dropout(rate=0.3)(outputlayer1)
    outputlayer2 = keras.layers.Dense(128, activation='relu')(outputlayer1)
    outputlayer2= keras.layers.Dropout(rate=0.4)(outputlayer2)
    
    model1 = keras.Model(inputs=inputlayer, outputs=gap_layer)

    mod3 = keras.layers.Dense(13, activation="relu")(inputB) 
    mod3 = keras.layers.Dense(2, activation="softmax")(mod3) 
    model3 = keras.Model(inputs=inputB, outputs=mod3)

    combined = keras.layers.concatenate([model1.output, model3.output])
    additional_layer = keras.layers.Dense(32, activation='relu')(combined)
    additional_layer = keras.layers.Dropout(rate=0.3)(additional_layer)
    additional_layer = keras.layers.Dense(16, activation='relu')(combined)
    additional_layer = keras.layers.Dropout(rate=0.4)(additional_layer)
    final_layer = keras.layers.Dense(2, activation="softmax")(additional_layer)
    model = keras.models.Model(inputs=[inputlayer,inputB], outputs=final_layer)


    #outputlayer = keras.layers.Dense(1, activation='sigmoid')(outputlayer2)

    #model = keras.Model(inputs=inputlayer, outputs=outputlayer)
  

    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=[tf.keras.metrics.BinaryAccuracy(
        name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                    tf.keras.metrics.AUC(
        num_thresholds=200,
        curve="ROC",
        summation_method="interpolation",
        name="AUC",
        dtype=None,
        thresholds=None,
        label_weights=None,
    )])

    return model

In [None]:
from sklearn.utils import class_weight
from keras.models import load_model
import pickle
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_AUC', factor=0.1, patience=1, verbose=1, mode='max',
    min_delta=0.0001, cooldown=0, min_lr=0
)
base_dir_save=''
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
random_seed_list=[]

for seed in range(len(random_seed_list)):
    random_seed=random_seed_list[seed]
    train_filenames, val_filenames, test_filenames, train_labels, val_labels, test_labels, train_age, val_age, test_age, train_gender, val_gender, test_gender = pc.split_and_shuffle_data(
    data_ecg_filenames, encoded_labels, age, gender, random_state=random_seed, validation=True)

    ##class-weights
    from sklearn.utils import class_weight
    class_weights = class_weight.compute_class_weight(class_weight='balanced',classes=np.unique(train_labels),y=train_labels)
    class_weight=dict(zip(np.unique(train_labels),class_weights))

    ##Model training
    simplified_model_cw=pc.vgg_16_model_M()
    simplified_model=pc.vgg_16_model_M()
    batch_size = 128
    train_generator = pc.ECGDataGenerator_M(train_filenames, train_labels,batch_size)
    val_generator = pc.ECGDataGenerator_M(val_filenames, val_labels,batch_size)
    history_cw=simplified_model_cw.fit(train_generator,epochs=100,steps_per_epoch=len(train_generator),validation_data=val_generator,validation_steps=len(val_generator),validation_freq=1, class_weight=class_weight,callbacks=[reduce_lr,early_stop])
    history=simplified_model.fit(train_generator,epochs=100,steps_per_epoch=len(train_generator),validation_data=val_generator,validation_steps=len(val_generator),validation_freq=1,callbacks=[reduce_lr,early_stop])
    ##model save
    file_name_cw=os.path.join(base_dir_save,'VGG16_model_WCW{}.h5'.format(random_seed))
    simplified_model_cw.save(file_name_cw)
    file_name=os.path.join(base_dir_save,'VGG16_model_WoCW{}.h5'.format(random_seed))
    simplified_model.save(file_name)
    ##history save
    history_file_cw = os.path.join(base_dir_save,'VGG16_model_WCW_history_{}.h5'.format(random_seed))
    with open(history_file_cw, 'wb') as f:
        pickle.dump(history_cw.history, f)   

    history_file= os.path.join(base_dir_save,'VGG16_model_WoCW_history_{}.h5'.format(random_seed))
    with open(history_file, 'wb') as f:
        pickle.dump(history.history, f)  

In [None]:
from sklearn.utils import class_weight
from keras.models import load_model
import pickle
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_AUC', factor=0.1, patience=1, verbose=1, mode='max',
    min_delta=0.0001, cooldown=0, min_lr=0
)
base_dir_save=''
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)


random_seed_list=[]
for seed in range(len(random_seed_list)):
    random_seed=random_seed_list[seed]
    train_filenames, val_filenames, test_filenames, train_labels, val_labels, test_labels, train_age, val_age, test_age, train_gender, val_gender, test_gender, train_HeartRate,val_HeartRate, test_HeartRate, train_InterBeatInterval,val_InterBeatInterval, test_InterBeatInterval, train_HRV_SDNN, val_HRV_SDNN, test_HRV_SDNN, train_HRV_RMSSD,val_HRV_RMSSD, test_HRV_RMSSD,train_HR_MAD,val_HR_MAD, test_HR_MAD, train_Ratio_of_SD1_SD2,val_Ratio_of_SD1_SD2, test_Ratio_of_SD1_SD2 = pc.split_and_shuffle_data_all(
        data_ecg_filenames, encoded_labels, age, gender,HeartRate,InterBeatInterval, HRV_SDNN,HRV_RMSSD,HR_MAD,Ratio_of_SD1_SD2,random_state=random_seed, validation=True)

    ##class-weights
    from sklearn.utils import class_weight
    class_weights = class_weight.compute_class_weight(class_weight='balanced',classes=np.unique(train_labels),y=train_labels)
    class_weight=dict(zip(np.unique(train_labels),class_weights))

    ##Model training
    demo_model =pc.alexNet_model_HRV_M()
    batch_size = 128
    
    train_generator = pc.ECGDataGenerator_all_HRV_M(train_filenames, train_labels, train_HeartRate,train_InterBeatInterval,train_HRV_SDNN,train_HRV_RMSSD,train_HR_MAD,train_Ratio_of_SD1_SD2, batch_size=batch_size)
    val_generator = pc.ECGDataGenerator_all_HRV_M(val_filenames, val_labels,val_HeartRate,val_InterBeatInterval,val_HRV_SDNN,val_HRV_RMSSD,val_HR_MAD,val_Ratio_of_SD1_SD2,batch_size=batch_size)
    history_demo=demo_model.fit(train_generator,epochs=100,steps_per_epoch=len(train_generator),validation_data=val_generator,validation_steps=len(val_generator),validation_freq=1,class_weight=class_weight,callbacks=[reduce_lr,early_stop])
    file_name_demo=os.path.join(base_dir_save,'alexNet_HRV_WCW_model_{}.h5'.format(random_seed))
    demo_model.save(file_name_demo)
    history_file_demo = os.path.join(base_dir_save,'alexNet_HRV_WCW_model_history_{}.h5'.format(random_seed))
    #history_file = 'history_RandSeed' + str(random_seed) + '.pkl'
    with open(history_file_demo, 'wb') as f:
        pickle.dump(history_demo.history, f)   


In [None]:
from sklearn.utils import class_weight
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_AUC', factor=0.1, patience=1, verbose=1, mode='max',
    min_delta=0.0001, cooldown=0, min_lr=0
)
base_dir_save=''
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)


random_seed_list=[]
for seed in range(len(random_seed_list)):
    random_seed=random_seed_list[seed]
    train_filenames, val_filenames, test_filenames, train_labels, val_labels, test_labels, train_age, val_age, test_age, train_gender, val_gender, test_gender, train_HeartRate,val_HeartRate, test_HeartRate, train_InterBeatInterval,val_InterBeatInterval, test_InterBeatInterval, train_HRV_SDNN, val_HRV_SDNN, test_HRV_SDNN, train_HRV_RMSSD,val_HRV_RMSSD, test_HRV_RMSSD,train_HR_MAD,val_HR_MAD, test_HR_MAD, train_Ratio_of_SD1_SD2,val_Ratio_of_SD1_SD2, test_Ratio_of_SD1_SD2 = pc.split_and_shuffle_data_all(
        data_ecg_filenames, encoded_labels, age, gender,HeartRate,InterBeatInterval, HRV_SDNN,HRV_RMSSD,HR_MAD,Ratio_of_SD1_SD2,random_state=random_seed, validation=True)

    ##class-weights
    from sklearn.utils import class_weight
    class_weights = class_weight.compute_class_weight(class_weight='balanced',classes=np.unique(train_labels),y=train_labels)
    class_weight=dict(zip(np.unique(train_labels),class_weights))

    ##Model training
    demo_model =pc.alexNet_model_demo_HRV_M()
    batch_size = 128
    
    train_generator = pc.ECGDataGenerator_all_demo_M(train_filenames, train_labels, train_age, train_gender,train_HeartRate,train_InterBeatInterval,train_HRV_SDNN,train_HRV_RMSSD,train_HR_MAD,train_Ratio_of_SD1_SD2, batch_size=batch_size)
    val_generator = pc.ECGDataGenerator_all_demo_M(val_filenames, val_labels, val_age, val_gender,val_HeartRate,val_InterBeatInterval,val_HRV_SDNN,val_HRV_RMSSD,val_HR_MAD,val_Ratio_of_SD1_SD2,batch_size=batch_size)
    history_demo=demo_model.fit(train_generator,epochs=100,steps_per_epoch=len(train_generator),validation_data=val_generator,validation_steps=len(val_generator),validation_freq=1,class_weight=class_weight,callbacks=[reduce_lr,early_stop])
    file_name_demo=os.path.join(base_dir_save,'alexNet_demo_HRV_WCW_model_{}.h5'.format(random_seed))
    demo_model.save(file_name_demo)
    history_file_demo = os.path.join(base_dir_save,'alexNet_demo_HRV_WCW_model_history_{}.h5'.format(random_seed))
    #history_file = 'history_RandSeed' + str(random_seed) + '.pkl'
    with open(history_file_demo, 'wb') as f:
        pickle.dump(history_demo.history, f) 