# **Installing Required Packages**

In [None]:
!pip install keras
! pip install tensorflow









# **Importing Necessary Libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat
import os
import sys
import seaborn
from sklearn.preprocessing import MultiLabelBinarizer
import tensorflow as tf
from tensorflow import keras
from keras_preprocessing.sequence import pad_sequences
from keras import backend
backend.set_image_data_format('channels_last')
from sklearn.ensemble import RandomForestClassifier
import pickle

# **Helper Functions to run the code**

### Function to load the ECG data from .hea and .wav files

In [None]:
def load_data(data_path): 
    data, sex, age, diagnosis, pescription, history, surgery, wave_length=[],[],[],[],[],[],[],[]
    for subdirectories, directories, files in sorted(os.walk(data_path)): #iterating through all the data files
        for file_name in files:
            file_path = subdirectories + os.sep + file_name
            if file_name.endswith(".mat"):   #reading the matlab files
                x=loadmat(file_path)
                y = (x['val'])
                y= pad_sequences(y, maxlen=5000, truncating='post',padding="post") #truncating upto first 5000 datapoints
                z=[]
                for i in range(12):
                  z.append((np.asarray(y[i])))
                data.append(np.asarray(z))
            elif file_name.endswith(".hea"):   #iterating through the header files
                with open(file_path,'r') as f:
                    header_file_data=f.readlines()  #reading a header file
                length = header_file_data[0].split() #splitting the header files first line data word by word
                wave_length.append(length[3]) #choosing the 4th word( readoing wave length)
                sex.append(header_file_data[14][7:-1]) #saving the sex of patients in a list  
                age.append(header_file_data[13][7:-1])  #saving the age of patients in a list
                diagnosis.append(header_file_data[15][6:-1]) #saving the diagnosis of patients in a list
                pescription.append(header_file_data[16][6:-1]) #saving the pescription of patients in a list
                history.append(header_file_data[17][6:-1]) #saving the history of patients in a list
                surgery.append(header_file_data[16][6:-1]) #saving the surgery records of patients in a list
    data=np.asarray(data)
    return data, wave_length, diagnosis , pescription, history, surgery, sex, age

### Function to encode the demographical data. I.e. Age,sex, etc.

In [None]:
def demographical_data_encoding(sex,age):
  for i in range(len(sex)):
    if sex[i]=="Male" or sex[i]=="M":   #assigns 1 to male
      sex[i]=1
    elif sex[i]=="Female" or sex[i]=="F":   #assigns 0 to female
      sex[i]=0
    elif sex[i]=="NaN":   #assigns 2 where gender has NaN value
      sex[i]=2
  sex = np.asarray(sex)

  rep=max(set(age), key = age.count) #returns the most occuring age in dataset
  for i in range(len(age)):   #assigns most occuring age to patient whose age is NAN 
    if age[i]=="NaN":
      age[i]=rep
  for j in range(len(age)):   #converts age to type int
    age[j]=int(age[j])
  age = np.asarray(age)

  for i in range(len(pescription)):
    if pescription[i]=="Unknown" or pescription[i]==" ":
      pescription[i]=0
  pescription = np.asarray(pescription)

  for i in range(len(history)):
    if history[i]=="Unknown" or history[i]==" ":
      history[i]=0
  history = np.asarray(history)

  for i in range(len(surgery)):
    if surgery[i]=="Unknown" or surgery[i]==" ":
      surgery[i]=0
  surgery = np.asarray(surgery)
  return sex, age , pescription, history, surgery

### Renet-50 Model Construction

https://machinelearningknowledge.ai/keras-implementation-of-resnet-50-architecture-from-scratch/

In [None]:
def id_block(X,F1,F2,F3):
  X_copy=X
  #first convolutional layer
  X=keras.layers.Conv1D(filters=F1,kernel_size=1,strides=1,padding='valid')(X)
  X = keras.layers.BatchNormalization()(X)
  X = keras.layers.Activation('relu')(X)
  #second convolutional layer
  X=keras.layers.Conv1D(filters=F2,kernel_size=3,strides=1,padding='same')(X)
  X = keras.layers.BatchNormalization()(X)
  X = keras.layers.Activation('relu')(X)
  #third convolutional layer
  X=keras.layers.Conv1D(filters=F3,kernel_size=1,strides=1,padding='valid')(X)
  X = keras.layers.BatchNormalization()(X)
  #Adding original value of X to the obtained X
  X=keras.layers.Add()([X,X_copy])
  X=keras.layers.Activation('relu')(X)

  return X   

In [None]:
def conv_block(X,F1,F2,F3):
  X_copy=X
  #first convolutional layer
  X=keras.layers.Conv1D(filters=F1,kernel_size=1,strides=2,padding='valid')(X)
  X = keras.layers.BatchNormalization()(X)
  X = keras.layers.Activation('relu')(X)
  #second convolutional layer
  X=keras.layers.Conv1D(filters=F2,kernel_size=3,strides=1,padding='same')(X)
  X = keras.layers.BatchNormalization()(X)
  X = keras.layers.Activation('relu')(X)
  #third convolutional layer
  X=keras.layers.Conv1D(filters=F3,kernel_size=1,strides=1,padding='valid')(X)
  X = keras.layers.BatchNormalization()(X)
  #direct path
  X_copy=keras.layers.Conv1D(filters=F3,kernel_size=1,strides=2,padding='valid')(X_copy)
  X_copy = keras.layers.BatchNormalization()(X_copy)
  #Adding original value of X to the obtained X
  X=keras.layers.Add()([X,X_copy])
  X=keras.layers.Activation('relu')(X)

  return X

In [None]:
def RESNET_50(a):
    input_shape = a
    input_layer = keras.layers.Input(input_shape)

    X=keras.layers.ZeroPadding1D(3)(input_layer)

    X=keras.layers.Conv1D(filters=64,kernel_size=7,strides=2)(X)
    X=keras.layers.BatchNormalization()(X)
    X=keras.layers.Activation('relu')(X)
    X=keras.layers.MaxPooling1D(pool_size=1,strides=2)(X)

    X=conv_block(X,64,64,256)
    X=id_block(X,64,64,256)
    X=id_block(X,64,64,256)

    X=conv_block(X,128,128,512)
    X=id_block(X,128,128,512)
    X=id_block(X,128,128,512)
    X=id_block(X,128,128,512)

    X=conv_block(X,256,256,1024)
    X=id_block(X,256,256,1024)
    X=id_block(X,256,256,1024)
    X=id_block(X,256,256,1024)
    X=id_block(X,256,256,1024)
    X=id_block(X,256,256,1024)

    X=conv_block(X,512,512,2048)
    X=id_block(X,512,512,2048)
    X=id_block(X,512,512,2048)

    X=keras.layers.AveragePooling1D(pool_size=2,padding='same')(X)
    X = keras.layers.Flatten()(X)
    X = keras.layers.Dense(28,activation='softmax')(X)

    model = keras.models.Model(inputs=input_layer, outputs=X)

    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), metrics=[tf.keras.metrics.BinaryAccuracy(
        name='accuracy', dtype=None, threshold=0.5), tf.keras.metrics.AUC(
        num_thresholds=200,
        curve="ROC",
        summation_method="interpolation",
        name="AUC")])

    return model

### Function to combine data with demographical features

In [None]:
def data_append_demographics(data,age,sex):
    for i in range(len(data)):     
      for j in range(12):
        data[i][j].append(sex[i])
        data[i][j].append(age[i])
    return data

### Seperating multi-labelled diagnosis and converting them to type int

In [None]:
def multi_labels_encoding(diagnosis):
    diagnosis_labels=[]           
    for i in range(len(diagnosis)):
      integers = [int(j) for j in diagnosis[i].split(',')]   #splitting where multilabelled data is present 
      diagnosis_labels.append(integers)
    return diagnosis_labels

### Function to convert 2D data into a numpy array

In [None]:
def list_to_array(data):
    feature_vector=[]                           #converting data to numpy array
    for i in range(len(data)):
      leads=[]
      for j in range(12):
        leads.append(np.asarray(data[i][j]))
      feature_vector.append(leads)
    feature_vector=np.asarray(feature_vector)
    return feature_vector

### Function to remove unscored labels from dataset

In [None]:
def Remove_unscored_labels(diagnosis_labels,diseases):
    for i in range(len(diagnosis_labels)): 
      for j in range(len(diagnosis_labels[i])):
        for k in range(len(diseases)):
          if(diagnosis_labels[i][j] == diseases[k]):
            diagnosis_labels[i][j]=0
    return diagnosis_labels

### Function to form multi class labels

In [None]:
def multi_class_labels(diagnosis_labels,scored_labels):    
    final_labels=[]
    for a in range(len(diagnosis_labels)):
      temp=[0]*len(scored_labels)
      for b in range(len(diagnosis_labels[a])):
        for c in range(len(scored_labels)):
          if(diagnosis_labels[a][b] == scored_labels[c]):
            temp[c]=1
      final_labels.append(temp)
    final_labels=np.asarray(final_labels)
    return final_labels

### Function to check the frequency of labels in the data

In [None]:
def check_label_freq(final_labels,scored_labels):
    temp=[0]*len(scored_labels)
    for i in range(len(final_labels)):
      for j in range(len(final_labels[i])):
        if final_labels[i][j]==1:
          temp[j]=temp[j]+1 
    return temp

### Function to find unique labels in dataset

In [None]:
def unique_in_dataset(diagnosis_labels):
    unique_elements=[]
    for a in diagnosis_labels:
      for b in a: 
        if b not in unique_elements: 
            unique_elements.append(b)
    return unique_elements

### Function to calculate the label weights

In [None]:
def calculating_class_weights(y_true):
    number_dim = np.shape(y_true)[1]
    weights = np.empty([number_dim, 2])
    for i in range(number_dim):
        weights[i] = compute_class_weight('balanced', [0.,1.], y_true[:, i])
    return weights

### Function to Filter Noise From the data

https://stackoverflow.com/questions/25191620/creating-lowpass-filter-in-scipy-understanding-methods-and-units

In [None]:
def butter_lowpass(cutoff,fs,order=5):
  nyq=0.5*fs
  normal_cutoff=cutoff/nyq
  b,a=butter(order,normal_cutoff,btype='low',analog=False)
  return b,a
def butter_lowpass_filter(data,cutoff,fs,order=5):
  b,a=butter_lowpass(cutoff,fs,order=order)
  y=lfilter(b,a,data)
  return y