In [1]:
import wfdb
import numpy as np
import random
import pickle
import matplotlib.pyplot as plt
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import ast
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import tensorflow as tf
from tqdm import tqdm
import seaborn as sns
import keras
from keras.models import Sequential
from keras.callbacks import History 
from keras.layers import Reshape
from keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout,MaxPooling1D,GlobalAveragePooling1D
from keras.regularizers import l2
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras import optimizers
from tensorflow.keras.layers import LeakyReLU
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import confusion_matrix

# # # More imports
from keras.preprocessing.sequence import pad_sequences
from keras import layers
from keras.layers import Input, Dense, Dropout, Activation, BatchNormalization, Add
from keras.layers import Conv1D, GlobalAveragePooling1D, MaxPool1D, ZeroPadding1D, LSTM, Bidirectional
from keras.models import Sequential, Model
from keras.layers.merge import concatenate

##  Loading data

In [9]:
# Load the preprocessed Y data
Y = pd.read_csv('Datasets/Ydata_all.csv')
Y.scp_codes = Y.scp_codes.apply(lambda x: ast.literal_eval(x))


  exec(code_obj, self.user_global_ns, self.user_ns)


In [10]:
# Loading the X data
X=np.loadtxt('Datasets/Xdata_all.csv')
print(X.shape)
X=X.reshape(len(Y), 200, 12)
print(X.shape)

(200118, 2400)
(200118, 200, 12)


### Converting SCP codes from keys to classes

In [11]:
# Load the classifications of arrythmias
# You may need to create a folder called Data and place all the data within it
# Alternatively, change the path
path = 'Data/ptb-xl-a-large-publicly-available-electrocardiography-dataset-1.0.1/'

classifications=pd.read_csv(path+'scp_statements.csv', index_col=0)
diagnostics=classifications[classifications.diagnostic==1]
forms=classifications[classifications.form==1]
rhythms=classifications[classifications.rhythm==1]
rhythms.index

Index(['SR', 'AFIB', 'STACH', 'SARRH', 'SBRAD', 'PACE', 'SVARR', 'BIGU',
       'AFLT', 'SVTAC', 'PSVT', 'TRIGU'],
      dtype='object')

In [12]:
# Random plotting function
random.seed(1)
def plot(X, Y, i, rand, info): 
    if rand==True:
        int=random.randrange(len(X))
    else:
        int=i
    if info==True:
        print(Y.iloc[int])
    print(int)
    plt.plot(X[int,:300, 11])
    plt.xlabel('samples')
    plt.ylabel('mV/Lead 1')
    plt.show()

In [13]:
# plot(X, Y, 2, rand=True, info=True)


In [14]:
# Extract the superclasses from the scp statements and apply to the database
def aggregate_supclass_diagnostic(y_dic):
    tmp = []
    for key in y_dic.keys():
        if key in diagnostics.index:
#             tmp.append(diagnostics.loc[key].diagnostic_class)
#             Only take probabilities of 100%!
            if y_dic.get(key)==100:
                tmp.append(diagnostics.loc[key].diagnostic_class)
    return list(set(tmp))
    
# Apply diagnostic superclass
Y['diagnostic_superclass'] = Y.scp_codes.apply(aggregate_supclass_diagnostic)
Y['diagnostic_superclass_len'] = Y['diagnostic_superclass'].apply(len)
# Y_reduced=Y[Y.diagnostic_superclass_len>0]
# X_reduced=X[np.where(Y.diagnostic_superclass_len>0)]            
# # multi=Y_reduced.loc[Y.diagnostic_superclass_len>0, 'diagnostic_superclass']
# multi=Y_reduced['diagnostic_superclass']

# # Hash the following line for multiclass ECGs
# # This line reduces for example, [NORM, STTC] to [NORM] by taking the first element 
# Y_reduced['diagnostic_superclass'] = [x[0] for x in multi]

In [15]:
print(len(Y))
# print(len(Y_reduced))
# print(len(Y_reduced[Y_reduced.diagnostic_subclass_len==0]))

200118


In [16]:
# Extract the subclasses from the scp statements and apply to the database
def aggregate_subclass_diagnostic(y_dic):
    tmp = []
    for key in y_dic.keys():
        if key in diagnostics.index:
            if y_dic.get(key)==100:
                tmp.append(diagnostics.loc[key].diagnostic_subclass)
    ret = list(set(tmp))
    ret = ['sub_'+r for r in ret] # to distinguish between subclass and superclass columns
    return ret

# Apply diagnostic subclass
Y['diagnostic_subclass'] = Y.scp_codes.apply(aggregate_subclass_diagnostic)
Y['diagnostic_subclass_len'] = Y['diagnostic_subclass'].apply(len)

# Y_reduced=Y_reduced[Y_reduced.diagnostic_subclass_len>0]
# X_reduced=X_reduced[np.where(Y_reduced.diagnostic_subclass_len>0)]  

# # multi=Y_reduced.loc[Y_reduced.diagnostic_subclass_len==1, 'diagnostic_subclass']
# multi=Y_reduced['diagnostic_subclass']

# print(len(multi), len(Y_reduced))


# # Hash the following line for multiclass ECGs
# Y_reduced['diagnostic_subclass'] = [x[0] for x in multi]


In [17]:
def aggregate_rhythms_diagnostic(y_dic):
    tmp = []
    for key in y_dic.keys():
        if key in rhythms.index:
                tmp.append(key)
    ret = list(set(tmp))
    return tmp

# Apply rhythms
Y['rhythms'] = Y.scp_codes.apply(aggregate_rhythms_diagnostic)
Y['rhythms_len'] = Y['rhythms'].apply(len)

In [18]:
# Only take data where Super/subclasses are labelled
Y_labelled=Y[Y.diagnostic_superclass_len>0]
X_labelled=X[np.where(Y.diagnostic_superclass_len>0)] 

Y_labelled=Y_labelled[Y_labelled.diagnostic_subclass_len>0]
X_labelled=X_labelled[np.where(Y_labelled.diagnostic_subclass_len>0)] 

In [19]:
# Only take data validated by humans (we dont trust robots!)
Y_labelled=Y_labelled[Y_labelled.validated_by_human==True]
X_labelled=X_labelled[np.where(Y_labelled.validated_by_human==True)] 

In [20]:
# Filter data for which only 1 subclass is present for each ECG
Y_single_class=Y_labelled[Y_labelled.diagnostic_superclass_len==1]
X_single_class=X_labelled[np.where(Y_labelled.diagnostic_superclass_len==1)]
print(len(Y_single_class))

98092


In [21]:
Y_single_class=Y_single_class[Y_single_class.diagnostic_subclass_len==1]
X_single_class=X_single_class[np.where(Y_single_class.diagnostic_subclass_len==1)]
print(len(Y_single_class))


92396


In [22]:
# all_superclass = pd.Series(np.hstack(Y_single_class['diagnostic_superclass'].values))
# all_subclass = pd.Series(np.hstack(Y_single_class['diagnostic_subclass'].values))
# print(len(all_subclass))

# superclass_cols = all_superclass.unique()
# subclass_cols = all_subclass.unique()
# update_cols = np.concatenate([superclass_cols, subclass_cols]) # add meta data columns
# meta_cols = ['age', 'sex', 'height', 'weight', 'nurse', 'site', 'device',] # could add more columns as features

### OHE the classes

In [23]:
all_superclass = pd.Series(np.concatenate(Y_labelled['diagnostic_superclass'].values))
all_subclass = pd.Series(np.concatenate(Y_labelled['diagnostic_subclass'].values))
all_rhythms = pd.Series(np.concatenate(Y_labelled['rhythms'].values))
superclass_cols = all_superclass.unique()
subclass_cols = all_subclass.unique()
rhythms_cols=all_rhythms.unique()
update_cols = np.concatenate([superclass_cols, subclass_cols, rhythms_cols]) # add meta data columns
meta_cols = ['age', 'sex', 'height', 'weight', 'nurse', 'site', 'device',] # could add more columns as features

In [24]:
class ClassUpdate():
    def __init__(self, cols):
        self.cols = cols

    def __call__(self, row):
        for sc in row['diagnostic_superclass']:
            row[sc] = 1
        for sc in row['diagnostic_subclass']:
            row[sc] = 1
        for sc in row['rhythms']:
            row[sc] = 1
            
        return row

def get_data_by_folds(folds, x, y, update_cols, feature_cols):
    assert len(folds)  > 0, '# of provided folds should longer than 1'
    #print(y.strat_fold)
    filt = np.isin(y.strat_fold.values, folds)
    x_selected = x[filt]
    y_selected = y[filt]
    
    for sc in update_cols:
        y_selected[sc] = 0
        
    cls_updt = ClassUpdate(update_cols)
    
    y_selected = y_selected.apply(cls_updt, axis=1)
    
    return x_selected, y_selected[list(feature_cols)+list(update_cols)+['strat_fold']]

In [25]:
x_all, y_all = get_data_by_folds(np.arange(1, 11), X_labelled, Y_labelled, update_cols, meta_cols)

In [None]:
# all_superclass = pd.Series(np.concatenate(Y_labelled['diagnostic_superclass'].values))
# all_subclass = pd.Series(np.concatenate(Y_labelled['diagnostic_subclass'].values))

In [None]:
# ohe_superclass=pd.get_dummies(all_superclass)
# ohe_subclass=pd.get_dummies(all_subclass)
# print(len(ohe_superclass))
# # Y_single_class=Y_single_class.join(ohe_superclass)
# # Y_single_class=Y_single_class.join(ohe_subclass)
# # # Y_reduced

In [35]:
# Shuffling the data
y_s=y_all.reset_index(drop=True)
y_shuffle=y_s.sample(frac=1)
x_shuffle=x_all[y_shuffle.index]

In [36]:
from scipy.signal import savgol_filter
def norm(x):
    return savgol_filter((x-min(x))/(max(x)-min(x)), 5, 3)

In [37]:
for i in range(len(x_shuffle)):
    for j in range(12):
        x=x_shuffle[i,:,j]
        x_shuffle[i,:,j]=norm(x)

In [None]:
X_all_diagnostics = x_all[:,:,1]

In [None]:
X_4000_random_not_CRBBB=x_shuffle[:4000,:,:].reshape(4000,2400)

In [None]:
np.savetxt('X_all_diagnostics.csv', X_all_diagnostics)

In [None]:
y_shuffle.to_csv('Y_all_diagnostics.csv', encoding='utf-8', index=False)

In [None]:
y_shuffle[['NORM', 'CD']]

In [38]:
Y_CRBBB=y_shuffle[y_shuffle.sub_CRBBB==1]
X_CRBBB=x_shuffle[np.where(y_shuffle.sub_CRBBB==1)]


In [None]:
Y_CRBBB_not=y_shuffle[y_shuffle.sub_CRBBB==0][:10000]
X_CRBBB_not=x_shuffle[np.where(y_shuffle.sub_CRBBB==0)][:10000]

In [None]:
X_CRBBB_not.shape

In [None]:
np.savetxt('X_CRBBB_not.csv', X_CRBBB_not.reshape(10000,2400))

In [None]:
X_CRBBB_reshaped=X_CRBBB.reshape(X_CRBBB.shape[0], 2400)
np.savetxt('X_CRBBB.csv', X_CRBBB_reshaped)

In [None]:
for i in range(20):
    r=np.random.randint(200)
    plt.plot(X_norm[r,:,11])
    plt.plot(X_CRBBB[r,:,11])
    plt.grid()
    plt.xlabel('samples')
    plt.ylabel('mV/Lead 1')
    plt.show()

In [None]:
for i in range(10):
    plt.plot(X_CRBBB[i,:,11])
    plt.xlabel('samples')
    plt.ylabel('mV/Lead 1')
    plt.show()

In [None]:
# plot(X_CRBBB, Y_CRBBB, 1, True, True)

In [117]:
# How much data is there for each class?
super_nums=y_all[superclass_cols].sum(axis=0)
# print(super_nums)

sub_nums=y_all[subclass_cols].sum(axis=0)
print(sub_nums.sort_values(ascending=False))



rhythms_no=y_all[rhythms_cols].sum(axis=0)
# print(rhythms_no).sort()

sub_NORM         51425
sub_STTC         14793
sub_LAFB/LPFB    13011
sub_AMI          12697
sub_IRBBB         7905
sub_IMI           7530
sub_ISC_          7222
sub_LVH           6623
sub_IVCD          5743
sub__AVB          5635
sub_ISCA          5000
sub_NST_          4764
sub_CRBBB         3635
sub_CLBBB         2992
sub_LAO/LAE       2102
sub_ISCI          1630
sub_RAO/RAE        871
sub_ILBBB          597
sub_LMI            407
sub_RVH            249
sub_WPW            200
sub_SEHYP          151
sub_PMI             52
dtype: int64


### We are now ready to split the data into a format suitable for CNN's: 

In [29]:
def data_splitter(X, Y, class_list, N):
    
    Y_class=Y[class_list]
    y_cat= Y_class[(Y_class[class_list[0]] == 1)]
    x_cat= X[np.where((Y_class[class_list[0]] == 1))]
    random_index=random.sample(range(0, len(y_cat)), N)
    
    x_cat=x_cat[random_index]
    y_cat=y_cat.iloc[random_index]
    
    for i in range(1, len(class_list)):
        
        y_temp= Y_class[(Y_class[class_list[i]] == 1)]
        x_temp= X[np.where((Y_class[class_list[i]] == 1))]
        
        n_positives=len(y_temp)
        n_instances=y_cat[(y_cat[class_list[i]] == 1)].sum(axis=0)
#         print(class_list[i], n_instances[i])
        nn=int(N-n_instances[i])
        if nn<0 or N>n_positives:
            nn=n_positives
        print(nn)
        
        random_index=random.sample(range(0, len(y_temp)), nn)
        x_temp=x_temp[random_index]
        y_temp=y_temp.iloc[random_index]          
                  
        y_cat=pd.concat((y_cat, y_temp), axis=0)
        x_cat=np.concatenate((x_cat, x_temp), axis=0)

    y_cat=y_cat[class_list]
    
    
    x_unique=np.unique(x_cat, axis=0)
    index=np.unique(x_cat, axis=0, return_index=True)[1]
    y_unique=y_cat.iloc[index]
#     print(y_cat.shape, y_unique.shape)
    print(y_cat.sum())
    print(y_unique.sum())
    
    X_train, X_test, Y_train, Y_test = train_test_split(x_cat, y_cat, test_size=0.2, shuffle=True)
    return X_train, X_test, Y_train, Y_test
    

In [79]:
def data_splitter1(X, Y, class_list, N):
    
    Y_class=Y[class_list]
    y_cat= Y_class[(Y_class[class_list[0]] == 1)]
    x_cat= X[np.where((Y_class[class_list[0]] == 1))]
    if len(y_cat)<N:
        n=len(y_cat)
    else:
        n=N
    random_index=random.sample(range(0, len(y_cat)), n)
    
    x_cat=x_cat[random_index]
    y_cat=y_cat.iloc[random_index]
    
    for i in range(1, len(class_list)):
        
        y_temp= Y_class[(Y_class[class_list[i]] == 1)]
        x_temp= X[np.where((Y_class[class_list[i]] == 1))]
        
        n_positives=len(y_temp)
        n_instances=y_cat[(y_cat[class_list[i]] == 1)].sum(axis=0)
#         print(class_list[i], n_instances[i])
        nn=int(N-n_instances[i])
        if nn<0 or N>n_positives:
            nn=n_positives
        print(nn)
        
        random_index=random.sample(range(0, len(y_temp)), nn)
        x_temp=x_temp[random_index]
        y_temp=y_temp.iloc[random_index]          
                  
        y_cat=pd.concat((y_cat, y_temp), axis=0)
        x_cat=np.concatenate((x_cat, x_temp), axis=0)

    y_cat=y_cat[class_list]
    
    
    x_unique=np.unique(x_cat, axis=0)
    index=np.unique(x_cat, axis=0, return_index=True)[1]
    y_unique=y_cat.iloc[index]
#     print(y_cat.shape, y_unique.shape)
    print(y_cat.sum())
    print(y_unique.sum())
    
    X_train, X_test, Y_train, Y_test = train_test_split(x_unique, y_unique, test_size=0.2, shuffle=True)
    return X_train, X_test, Y_train, Y_test

In [80]:
classes=['sub_NORM', 'sub_LAFB/LPFB', 'sub_AMI', 'sub_STTC', 'sub_LVH', 'sub_IVCD', 'sub_IRBBB', 'sub__AVB', 'sub_ISCA', 'sub_IMI', 'sub_ISC_', 'sub_NST_']
classes1=['sub_LMI', 'sub_RAO/RAE', 'sub_ILBBB', 'sub_LAO/LAE', 'sub_WPW', 'sub_ISCI', 'sub__AVB', 'sub_ISC_', 'sub_IVCD', 'sub_ISCA','sub_CRBBB', 'sub_NST_', 'sub_CLBBB', 'sub_LVH', 'sub_IRBBB', 'sub_LAFB/LPFB', 'sub_IMI', 'sub_AMI', 'sub_STTC', 'sub_NORM'] 
classes2=['NORM','HYP','MI', 'CD', 'STTC']
classes3=['sub_NORM', 'sub_AMI']
X_train, X_test, Y_train, Y_test=data_splitter1(x_shuffle, y_shuffle, classes3, 1000)
# test=data_splitter_3(x_all, y_all,'sub_NORM', 'sub_AMI','sub_IRBBB',1000)



1000
sub_NORM    1000
sub_AMI     1000
dtype: int64
sub_NORM    1000
sub_AMI     1000
dtype: int64


In [50]:
X_20class, Y_20class =data_splitter1(x_shuffle, y_shuffle, classes1, 10000)

871
597
2102
200
1630
5635
7222
5743
5000
3635
4764
2992
6623
7905
2699
7530
12697
6022
9093
sub_LMI           1004
sub_RAO/RAE       1488
sub_ILBBB         1456
sub_LAO/LAE       5256
sub_WPW            233
sub_ISCI          2880
sub__AVB         11884
sub_ISC_         16207
sub_IVCD         10926
sub_ISCA          8580
sub_CRBBB         5641
sub_NST_          5527
sub_CLBBB         4055
sub_LVH          15436
sub_IRBBB        10811
sub_LAFB/LPFB    14271
sub_IMI          14810
sub_AMI          24634
sub_STTC         10002
sub_NORM         10000
dtype: int64
sub_LMI            407
sub_RAO/RAE        871
sub_ILBBB          597
sub_LAO/LAE       2102
sub_WPW            200
sub_ISCI          1630
sub__AVB          5635
sub_ISC_          7213
sub_IVCD          5736
sub_ISCA          4999
sub_CRBBB         3635
sub_NST_          4751
sub_CLBBB         2988
sub_LVH           6619
sub_IRBBB         7904
sub_LAFB/LPFB     8952
sub_IMI           7524
sub_AMI          12697
sub_STTC          79

In [53]:
np.savetxt('X_20class.csv', X_20class.reshape(X_20class.shape[0], 2400))
Y_20class.to_csv('Y_20class.csv', encoding='utf-8', index=False)

In [66]:
x=Y_20class.sum().sum()/20
print(x)

5114.45


In [None]:
np.savetxt('X_train.csv', X_train.reshape(X_train.shape[0], 2400))
np.savetxt('X_test.csv', X_test.reshape(X_test.shape[0], 2400))


In [None]:
Y_train.to_csv('Y_train.csv', encoding='utf-8', index=False)
Y_test.to_csv('Y_test.csv', encoding='utf-8', index=False)

In [None]:
Y_

In [None]:
# for i in range(6):
#     plot(X_train, Y_train, 5, True, True)

In [25]:
# Y_norm=y_shuffle[y_shuffle.sub_NORM==1]
# X_norm=x_shuffle[np.where(y_shuffle.sub_NORM==1)]

# Y_not_norm=y_shuffle[y_shuffle.sub_NORM==0]
# X_not_norm=x_shuffle[np.where(y_shuffle.sub_NORM==0)]

# print(Y_norm.shape,Y_not_norm.shape)

norm_ohe=pd.get_dummies(y_shuffle.NORM)
norm_ohe.columns=['Not_norm', 'Norm']
X_train_b, X_test_b, Y_train_b, Y_test_b = train_test_split(x_shuffle, norm_ohe, test_size=0.2, random_state=69)

Y_norm=y_shuffle[y_shuffle.NORM==1]
X_norm=x_shuffle[np.where(y_shuffle.NORM==1)]

Y_not_norm=y_shuffle[y_shuffle.NORM==0]
X_not_norm=x_shuffle[np.where(y_shuffle.NORM==0)]
print(X_norm.shape,Y_norm.shape)



(51425, 200, 12) (51425, 48)


In [None]:
Y_norm=y_shuffle[y_shuffle.NORM==1]
X_norm=x_shuffle[np.where(y_shuffle.NORM==1)]

In [None]:
X_norm_data_12=X_norm[:4000,:,:]
X_arr_reshaped = X_norm_data_12.reshape(X_norm_data_12.shape[0], -1)
np.savetxt('X_norm_12lead.csv', X_arr_reshaped)

In [None]:
Y_not_norm=y_shuffle[y_shuffle.NORM==0]
X_not_norm=x_shuffle[np.where(y_shuffle.NORM==0)]

In [None]:
norm_ohe=pd.get_dummies(y_shuffle.NORM)
norm_ohe.columns=['Not_norm', 'Norm']
y_shuffle=y_shuffle.join(norm_ohe)

In [None]:
Y_n = norm

In [None]:
print(norm_ohe.sum())

In [None]:
# Can a CNN determine gender from ECG?
sex_ohe=pd.get_dummies(y_shuffle['sex'], columns=['M', 'F'])
X_train_sex, X_test_sex, Y_train_sex, Y_test_sex=train_test_split(x_shuffle, sex_ohe, test_size=0.2, random_state=3)

print(sex_ohe.sum())


In [None]:
print(Y_norm.age)

In [None]:
ages=np.array(Y_norm.age)
# print(ages)
Y_ages = ages[~np.isnan(ages)]
X_ages = X_norm[np.where(ages[~np.isnan(ages)])]
# print(ages.shape, Y_ages.shape, Y_ages.shape)
for i in range(len(Y_ages)):
    if Y_ages[i]<40:
        Y_ages[i]=0
    elif 40<=Y_ages[i]<50:
        Y_ages[i]=1
    elif 50<=Y_ages[i]<60:
        Y_ages[i]=2
    elif 60<=Y_ages[i]<70:
        Y_ages[i]=3
    elif Y_ages[i]>=70:
        Y_ages[i]=4

    
   

In [None]:
age_ohe=pd.get_dummies(Y_ages)
print(age_ohe.sum(axis=0))
X_train_age, X_test_age, Y_train_age, Y_test_age=train_test_split(X_ages, age_ohe, test_size=0.2, random_state=58)
print(X_train_age.shape, Y_train_age.shape)

# X_t, Y_t=X_ages[1000:2000], age_ohe[1000:2000]


## Lets test some 1D CNN's:

In [None]:
# CNN example (basic)
model_basic = Sequential()
model_basic.add(Convolution1D(100, 5, activation='relu', input_shape=(200,12)))
model_basic.add(Convolution1D(100, 10, activation='relu'))
model_basic.add(MaxPooling1D(3))
model_basic.add(Convolution1D(100, 10, activation='relu'))
model_basic.add(Convolution1D(160, 10, activation='relu'))
model_basic.add(GlobalAveragePooling1D())
model_basic.add(Dropout(0.2))
# model_basic.add(Flatten())
model_basic.add(Dense(100, activation='relu'))
model_basic.add(Dense(5, activation='sigmoid'))
# print(model.summary())
model_basic.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.BinaryAccuracy(
    name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                tf.keras.metrics.AUC(
    num_thresholds=200,
    curve="ROC",
    summation_method="interpolation",
    name="AUC",
    dtype=None,
    thresholds=None,
    multi_label=True,
    label_weights=None,
)])

In [None]:
Model_basic=model_basic.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=50)

In [None]:
# CNN example for single class samples
model = Sequential()

# Convolutional block 1
model.add(Convolution1D(32, 3, activation='relu', input_shape=(200,12)))
model.add(tf.keras.layers.BatchNormalization())
model.add(MaxPooling1D(2))
model.add(Convolution1D(32, 3, activation='relu'))
model.add(MaxPooling1D(2))
model.add(Dropout(0.2))

# Convolutional block 2
model.add(Convolution1D(64, 5, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(MaxPooling1D(2))
model.add(Convolution1D(64, 5, activation='relu'))
model.add(MaxPooling1D(2))

# Convolutional block 3
model.add(Convolution1D(128, 7, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(MaxPooling1D(2))
# model.add(Convolution1D(128, 7, activation='relu'))
# model.add(MaxPooling1D(2))

model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(20, activation='relu'))
model.add(Dense(5, activation='sigmoid'))

model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.BinaryAccuracy(
    name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                tf.keras.metrics.AUC(
    num_thresholds=200,
    curve="ROC",
    summation_method="interpolation",
    name="AUC",
    dtype=None,
    thresholds=None,
    multi_label=True,
    label_weights=None,
)])

In [None]:
Model=model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=50)

In [None]:
# CNN example for multiclass samples
# model_1 = Sequential()

# # Convolutional block 1
# model_1.add(Convolution1D(32, 3, activation='relu', input_shape=(200,12)))
# model_1.add(tf.keras.layers.BatchNormalization())
# model_1.add(MaxPooling1D(2))
# model_1.add(Convolution1D(32, 3, activation='relu'))
# model_1.add(MaxPooling1D(2))
# model_1.add(Dropout(0.3))

# # Convolutional block 2
# model_1.add(Convolution1D(64, 5, activation='relu'))
# model_1.add(MaxPooling1D(2))
# model_1.add(Convolution1D(64, 5, activation='relu'))
# model_1.add(MaxPooling1D(2))

# # Convolutional block 3
# model_1.add(Convolution1D(128, 7, activation='relu'))
# model_1.add(MaxPooling1D(2))
# # model.add(Convolution1D(128, 7, activation='relu'))
# # model.add(MaxPooling1D(2))

# model_1.add(Flatten())
# model_1.add(Dropout(0.3))
# model_1.add(Dense(64, activation='relu'))
# model_1.add(Dropout(0.3))
# model_1.add(Dense(20, activation='relu'))
# model_1.add(Dense(4, activation='sigmoid'))

# # print(model_1.summary())
# model_1.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# epoch_no=50
# Model_1=model_1.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epoch_no)

## How does amount of data affect accuracy?

In [None]:
# LeakyRelu model
model_1= Sequential()
model_1.add(Convolution1D(32,3, input_shape=(200,12)))
model_1.add(LeakyReLU(alpha=0.01))
model_1.add(MaxPooling1D(2))
model_1.add(Dropout(0.25))

model_1.add(Convolution1D(64,3))
model_1.add(LeakyReLU(alpha=0.01))
model_1.add(MaxPooling1D(2))
model_1.add(Dropout(0.25))

model_1.add(Flatten())
model_1.add(Dense(256))
model_1.add(LeakyReLU(alpha=0.01))
model_1.add(Dropout(0.25))
model_1.add(Dense(2, activation='sigmoid'))
model_1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# print(model_2.summary())

In [106]:
# LeakyRelu model
model_2 = Sequential()
model_2.add(Convolution1D(32,1, input_shape=(200,12)))
model_2.add(LeakyReLU(alpha=0.01))
model_2.add(MaxPooling1D(2))
model_2.add(Dropout(0.1))

model_2.add(Convolution1D(64,1))
model_2.add(LeakyReLU(alpha=0.01))
model_2.add(MaxPooling1D(2))
model_2.add(Dropout(0.1))

model_2.add(Flatten())
model_2.add(Dense(256))
model_2.add(LeakyReLU(alpha=0.01))
model_2.add(Dropout(0.1))
model_2.add(Dense(2, activation='sigmoid'))
model_2.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.BinaryAccuracy(
    name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                tf.keras.metrics.AUC(
    num_thresholds=200,
    curve="ROC",
    summation_method="interpolation",
    name="AUC",
    dtype=None,
    thresholds=None,
    multi_label=True,
    label_weights=None,
)])

In [108]:
# sub_STTC         14793
# sub_NST_          4764
# sub_LVH           6623
# sub_LAFB/LPFB 

c=['sub_NORM', 'sub_NST_']
X_train, X_test, Y_train, Y_test=data_splitter(x_shuffle, y_shuffle, c, 5000)
epoch_no=50
Model_2=model_2.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epoch_no)


4764
sub_NORM    5000
sub_NST_    4764
dtype: int64
sub_NORM    5000
sub_NST_    4751
dtype: int64
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# LeakyRelu model
model_3 = Sequential()
model_3.add(Convolution1D(32,1, input_shape=(200,12)))
model_3.add(LeakyReLU(alpha=0.01))
model_3.add(MaxPooling1D(2))
model_3.add(Dropout(0.1))

model_3.add(Convolution1D(64,3))
model_3.add(LeakyReLU(alpha=0.01))
model_3.add(MaxPooling1D(2))
model_3.add(Dropout(0.1))

model_3.add(Flatten())
model_3.add(Dense(55))
model_3.add(LeakyReLU(alpha=0.01))
model_3.add(Dense(5, activation='sigmoid'))
model_2.add(Dropout(0.1))
optimizer =tf.keras.optimizers.Adam(clipvalue=0.5)
model_3.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
# print(model_3.summary())

In [None]:
epoch_no=50
Model_3=model_3.fit(X_train_b, Y_train_b, validation_data=(X_test_b, Y_test_b), epochs=epoch_no, batch_size=128)

In [None]:
# epoch_no=20
# Model_3=model_3.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epoch_no)

In [None]:
accuracy_1, val_accuracy_1=Model_1.history['accuracy'], Model_1.history['val_accuracy']
accuracy_2, val_accuracy_2=Model_2.history['accuracy'], Model_2.history['val_accuracy']
accuracy_3, val_accuracy_3=Model_3.history['accuracy'], Model_3.history['val_accuracy']
# plt.figure()
# plt.plot(range(epoch_no), accuracy_2, label='training 2', color='g')
# plt.plot(range(epoch_no), accuracy_3, label='training accuracy', color='b')
plt.plot(range(epoch_no), val_accuracy_2, label='dropout=0.10', linestyle='-', color='blue')
plt.plot(range(epoch_no), val_accuracy_1, label='dropout=0.25', linestyle='-', color='red')
plt.plot(range(epoch_no), val_accuracy_3, label='dropout=0.40', linestyle='-', color='green')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.title('Variation of test accuracy by dropout rate in a binary classification task')
plt.show()

## Fine tuning parameters: Optimizer, Learning rate, dropout, LeakyReLu alpha - will only be using 1000 data points to start to reduce time taken to run all epochs

In [None]:
# Learning rate
def create_model(learn_rate=0.01):
    # LeakyRelu model
    model_3 = Sequential()
    model_3.add(Convolution1D(32,3, input_shape=(200,12)))
    model_3.add(LeakyReLU(alpha=0.01))
    model_3.add(MaxPooling1D(2))
    model_3.add(Dropout(0.25))

    model_3.add(Convolution1D(64,3))
    model_3.add(LeakyReLU(alpha=0.01))
    model_3.add(MaxPooling1D(2))
    model_3.add(Dropout(0.25))

    model_3.add(Flatten())
    model_3.add(Dense(256))
    model_3.add(LeakyReLU(alpha=0.01))
    model_3.add(Dense(2, activation='softmax'))
    
    optimizer =tf.keras.optimizers.Adam(learning_rate=learn_rate)
    model_3.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model_3

In [None]:
model_3 = KerasClassifier(build_fn=create_model, epochs=10, batch_size=10, verbose=1)
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
param_grid = dict(learn_rate=learn_rate)
grid = GridSearchCV(estimator=model_3, param_grid=param_grid, n_jobs=1, cv=3, verbose=2)
grid_result = grid.fit(X_train, Y_train, validation_data=(X_test, Y_test))

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("accuracy - %f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Kernal initializer
def create_model(init_mode='uniform'):
    # LeakyRelu model
    model_3 = Sequential()
    model_3.add(Convolution1D(32,3, input_shape=(200,12)))
    model_3.add(LeakyReLU(alpha=0.01))
    model_3.add(MaxPooling1D(2))
    model_3.add(Dropout(0.25))

    model_3.add(Convolution1D(64,3))
    model_3.add(LeakyReLU(alpha=0.01))
    model_3.add(MaxPooling1D(2))
    model_3.add(Dropout(0.25))

    model_3.add(Flatten())
    model_3.add(Dense(256, kernel_initializer=init_mode))
    model_3.add(LeakyReLU(alpha=0.01))
    model_3.add(Dense(2, kernel_initializer=init_mode, activation='sigmoid'))
    
    optimizer =tf.keras.optimizers.Adam(learning_rate=0.001)
    model_3.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model_3

In [None]:
model_3 = KerasClassifier(build_fn=create_model, epochs=20, batch_size=10, verbose=1)
init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
param_grid = dict(init_mode=init_mode)
grid = GridSearchCV(estimator=model_3, param_grid=param_grid, n_jobs=1, cv=2, verbose=2)
grid_result = grid.fit(X_train, Y_train, validation_data=(X_test, Y_test))

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("accuracy - %f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Dropout
def create_model(dropout=0):
    # LeakyRelu model
    model_3 = Sequential()
    model_3.add(Convolution1D(32,3, input_shape=(200,12)))
    model_3.add(LeakyReLU(alpha=0.01))
    model_3.add(MaxPooling1D(2))
    model_3.add(Dropout(dropout))

    model_3.add(Convolution1D(64,3))
    model_3.add(LeakyReLU(alpha=0.01))
    model_3.add(MaxPooling1D(2))
    model_3.add(Dropout(dropout))

    model_3.add(Flatten())
    model_3.add(Dense(256))
    model_3.add(LeakyReLU(alpha=0.01))
    model_3.add(Dropout(dropout))
    model_3.add(Dense(2, activation='sigmoid'))
    
    optimizer =tf.keras.optimizers.Adam(learning_rate=0.001)
    model_3.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model_3

In [None]:
model_3 = KerasClassifier(build_fn=create_model, epochs=50, verbose=1)
dropout = [0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3]
param_grid = dict(dropout=dropout)
grid = GridSearchCV(estimator=model_3, param_grid=param_grid, n_jobs=1, cv=4, verbose=2)
grid_result = grid.fit(X_train, Y_train, validation_data=(X_test, Y_test))

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("accuracy - %f (%f) with: %r" % (mean, stdev, param))

In [None]:
# alpha
def create_model(alpha=0):
    # LeakyRelu model
    model_3 = Sequential()
    model_3.add(Convolution1D(32,3, input_shape=(200,12)))
    model_3.add(LeakyReLU(alpha=alpha))
    model_3.add(MaxPooling1D(2))
    model_3.add(Dropout(0.1))

    model_3.add(Convolution1D(64,3))
    model_3.add(LeakyReLU(alpha=alpha))
    model_3.add(MaxPooling1D(2))
    model_3.add(Dropout(0.1))

    model_3.add(Flatten())
    model_3.add(Dense(256))
    model_3.add(LeakyReLU(alpha=alpha))
    model_3.add(Dropout(0.1))
    model_3.add(Dense(2, activation='sigmoid'))
    
    model_3.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    
    return model_3


In [None]:
model_3 = KerasClassifier(build_fn=create_model, epochs=40, verbose=1)
alpha = [0.001, 0.003, 0.007, 0.01, 0.03, 0.07, 0.1]
param_grid = dict(alpha=alpha)
grid = GridSearchCV(estimator=model_3, param_grid=param_grid, n_jobs=1, cv=3, verbose=2)
grid_result = grid.fit(X_train, Y_train, validation_data=(X_test, Y_test))

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("accuracy - %f (%f) with: %r" % (mean, stdev, param))

In [None]:
accuracy, val_accuracy=Model.history['accuracy'], Model.history['val_accuracy']

In [None]:
accuracy, val_accuracy=Model_2.history['accuracy'], Model_2.history['val_accuracy']
plt.figure()
plt.plot(range(epoch_no), accuracy, label='training')
plt.plot(range(epoch_no), val_accuracy, label='validation', linestyle='--')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()

In [None]:
loss, val_loss=Model_2.history['loss'], Model_2.history['val_loss']
plt.figure()
plt.plot(range(epoch_no), loss, label='training')
plt.plot(range(epoch_no), val_loss, label='validation', linestyle='--')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend(loc='upper right')
plt.show()

In [None]:

score= model.evaluate(X_test, Y_test)
# print(X_test.shape, Y_test.shape, Y_train.shape)



In [None]:
predict_x=model.predict(X_test) 
Y_pred=np.argmax(predict_x,axis=1)
print(Y_pred)


In [None]:
t=np.array(Y_test)
true=np.argmax(t, axis=1)
# for i in range(len(t)):
#     Y_test['int']=t.iloc[i].argmax()


In [None]:
cm = confusion_matrix(true, Y_pred, normalize='true')
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)

disp.plot(cmap=plt.cm.Blues)
plt.show()
# print(classes)

In [None]:
alexNet_model=Sequential()

alexNet_model.add(Convolution1D(filters=96, kernel_size=11, strides=4, input_shape=(200,12)))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))
alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

alexNet_model.add(Convolution1D(filters=256, kernel_size=5, padding='same'))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))
alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

alexNet_model.add(Convolution1D(filters=384, padding='same', kernel_size=3))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))
alexNet_model.add(Convolution1D(filters=384, kernel_size=3))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))

alexNet_model.add(Convolution1D(filters=256, kernel_size=3))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))
alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

alexNet_model.add(GlobalAveragePooling1D())
alexNet_model.add(Dense(128, activation='relu'))
alexNet_model.add(Dropout(0.2))
alexNet_model.add(Dense(128, activation='relu'))
alexNet_model.add(Dropout(0.2))
alexNet_model.add(Dense(5, activation='sigmoid'))

In [None]:
 alexNet_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.BinaryAccuracy(
        name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                    tf.keras.metrics.AUC(
        num_thresholds=200,
        curve="ROC",
        summation_method="interpolation",
        name="AUC",
        dtype=None,
        thresholds=None,
        multi_label=True,
        label_weights=None,
    )])

In [None]:
epoch_no=50
alex_model=alexNet_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epoch_no)

In [None]:
# LeakyRelu model
model_2 = Sequential()
model_2.add(Convolution1D(32,3, input_shape=(200,12)))
model_2.add(LeakyReLU(alpha=0.01))
model_2.add(MaxPooling1D(2))
model_2.add(Dropout(0.1))

model_2.add(Convolution1D(64,3))
model_2.add(LeakyReLU(alpha=0.01))
model_2.add(MaxPooling1D(2))
model_2.add(Dropout(0.1))

model_2.add(Flatten())
model_2.add(Dense(256))
model_2.add(LeakyReLU(alpha=0.01))
model_2.add(Dropout(0.1))
model_2.add(Dense(4, activation='sigmoid'))
# print(model_2.summary())

In [None]:
model_2.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.BinaryAccuracy(
        name='accuracy', dtype=None),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                    tf.keras.metrics.AUC(
        num_thresholds=200,
        curve="ROC",
        summation_method="interpolation",
        name="AUC",
        dtype=None,
        thresholds=None,
        multi_label=True,
        label_weights=None,
    )])

In [None]:
epoch_no=50
Model_2=model_2.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epoch_no)

In [None]:
accuracy_alex, val_acc_alex=alex_model_1.history['accuracy'], alex_model_1.history['val_accuracy']
# accuracy, val_accuracy=Model_2.history['accuracy'], Model_2.history['val_accuracy']
plt.figure()
plt.plot(range(1, epoch_no+1), val_acc_alex, label='AlexNet')
# plt.plot(range(1, epoch_no+1), val_accuracy, label='LeakyRelu')
plt.grid()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.title('5 Class Classification')
plt.show()

In [None]:
predict_x=alexNet_model.predict(X_test) 
print(predict_x)
# predict_x_2=model_2.predict(X_test) 
# print(predict_x_2)

In [None]:
Y_pred=predict_x
for i in range(Y_pred.shape[0]):
    for j in range(Y_pred.shape[1]):
        if Y_pred[i,j]>0.5:
            Y_pred[i,j]=1
        else:
            Y_pred[i,j]=0
            
# Y_pred_2=predict_x_2
# for i in range(Y_pred_2.shape[0]):
#     for j in range(Y_pred_2.shape[1]):
#         if Y_pred_2[i,j]>0.5:
#             Y_pred_2[i,j]=1
#         else:
#             Y_pred_2[i,j]=0
                
print(Y_pred[200:210]) 
print(Y_test[200:210])
# print(Y_pred_2)

In [None]:
# Recall
counter=0
n_labels=0
Y_test_arr=np.array(Y_test)
total=Y_pred.shape[0]*Y_pred.shape[1]
for i in range(Y_pred.shape[0]):
    for j in range(Y_pred.shape[1]):
#         if Y_test_arr[i,j]==1:
        n_labels+=1
        if Y_pred[i,j]==Y_test_arr[i,j]:
            counter+=1

print(100*counter/n_labels)        
print(n_labels)

In [None]:
counter=0
n_labels=0
Y_test_arr=np.array(Y_test)
total=Y_pred.shape[0]*Y_pred.shape[1]
for i in range(Y_pred.shape[0]):
    for j in range(Y_pred.shape[1]):
        if Y_test_arr[i,j]==0:
            n_labels+=1
            if Y_pred[i,j]==Y_test_arr[i,j]:
                counter+=1
            
print(100*counter/n_labels)
print(n_labels)

# Grid search for the AlexNet CNN

In [None]:
 def alex_model(dropout_rate=0):
    alexNet_model=Sequential()

    alexNet_model.add(Convolution1D(filters=96, kernel_size=11, strides=4, input_shape=(200,12)))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

    alexNet_model.add(Convolution1D(filters=256, kernel_size=5, padding='same'))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

    alexNet_model.add(Convolution1D(filters=384, padding='same', kernel_size=3))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(Convolution1D(filters=384, kernel_size=3))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(Convolution1D(filters=256, kernel_size=3))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

    alexNet_model.add(GlobalAveragePooling1D())
    alexNet_model.add(Dense(128, activation='relu'))
    alexNet_model.add(Dropout(dropout_rate))
    alexNet_model.add(Dense(128, activation='relu'))
    alexNet_model.add(Dropout(dropout_rate))
    alexNet_model.add(Dense(2, activation='softmax'))
    
    alexNet_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.BinaryAccuracy(
        name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                    tf.keras.metrics.AUC(
        num_thresholds=200,
        curve="ROC",
        summation_method="interpolation",
        name="AUC",
        dtype=None,
        thresholds=None,
        multi_label=True,
        label_weights=None,
    )])
    
    return alexNet_model

In [None]:
X_train, X_test, Y_train, Y_test=data_splitter(x_shuffle, y_shuffle, ['sub_IRBBB', 'sub_AMI'], 5000)

In [None]:
alex_drop = KerasClassifier(build_fn=alex_model, epochs=40, verbose=1)
dropout_rate = [0, 0.1, 0.2, 0.3]
param_grid = dict(dropout_rate=dropout_rate)
grid = GridSearchCV(estimator=alex_drop, param_grid=param_grid, n_jobs=1, cv=2, verbose=2)
grid_result = grid.fit(X_train, Y_train, validation_data=(X_test, Y_test))

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("accuracy - %f (%f) with: %r" % (mean, stdev, param))

In [None]:
def alex_model_op(optimizer='adam'):
    alexNet_model=Sequential()

    alexNet_model.add(Convolution1D(filters=96, kernel_size=11, strides=4, input_shape=(200,12)))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

    alexNet_model.add(Convolution1D(filters=256, kernel_size=5, padding='same'))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

    alexNet_model.add(Convolution1D(filters=384, padding='same', kernel_size=3))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(Convolution1D(filters=384, kernel_size=3))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(Convolution1D(filters=256, kernel_size=3))
    alexNet_model.add(tf.keras.layers.BatchNormalization())
    alexNet_model.add(Activation('relu'))
    alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

    alexNet_model.add(GlobalAveragePooling1D())
    alexNet_model.add(Dense(128, activation='relu'))
    alexNet_model.add(Dropout(0.2))
    alexNet_model.add(Dense(128, activation='relu'))
    alexNet_model.add(Dropout(0.2))
    alexNet_model.add(Dense(5, activation='softmax'))
    
    alexNet_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=optimizer, metrics=[tf.keras.metrics.BinaryAccuracy(
        name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                    tf.keras.metrics.AUC(
        num_thresholds=200,
        curve="ROC",
        summation_method="interpolation",
        name="AUC",
        dtype=None,
        thresholds=None,
        multi_label=True,
        label_weights=None,
    )])
    
    return alexNet_model

In [None]:
alex_opt = KerasClassifier(build_fn=alex_model_op, epochs=40, verbose=1)
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=alex_opt, param_grid=param_grid, n_jobs=1, cv=2, verbose=2)
grid_result = grid.fit(X_train, Y_train, validation_data=(X_test, Y_test))

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("accuracy - %f (%f) with: %r" % (mean, stdev, param))

In [None]:
alexNet_model=Sequential()

alexNet_model.add(Convolution1D(filters=96, kernel_size=11, strides=4, input_shape=(200,12)))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))
alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

alexNet_model.add(Convolution1D(filters=256, kernel_size=5, padding='same'))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))
alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

alexNet_model.add(Convolution1D(filters=384, padding='same', kernel_size=3))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))

alexNet_model.add(Convolution1D(filters=384, kernel_size=3))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))

alexNet_model.add(Convolution1D(filters=256, kernel_size=3))
alexNet_model.add(tf.keras.layers.BatchNormalization())
alexNet_model.add(Activation('relu'))

alexNet_model.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

alexNet_model.add(GlobalAveragePooling1D())
alexNet_model.add(Dense(128, activation='relu'))
alexNet_model.add(Dropout(0.1))
alexNet_model.add(Dense(128, activation='relu'))
alexNet_model.add(Dropout(0.1))
alexNet_model.add(Dense(5, activation='sigmoid'))

alexNet_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.BinaryAccuracy(
    name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                tf.keras.metrics.AUC(
    num_thresholds=200,
    curve="ROC",
    summation_method="interpolation",
    name="AUC",
    dtype=None,
    thresholds=None,
    multi_label=True,
    label_weights=None,
)])

In [None]:
epoch_no=70
alex_model_1=alexNet_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epoch_no)

In [None]:
alexNet_model_2=Sequential()

alexNet_model_2.add(Convolution1D(filters=96, kernel_size=11, strides=4, input_shape=(200,12)))
alexNet_model_2.add(tf.keras.layers.BatchNormalization())
alexNet_model_2.add(Activation('relu'))
alexNet_model_2.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

alexNet_model_2.add(Convolution1D(filters=256, kernel_size=5, padding='same'))
alexNet_model_2.add(tf.keras.layers.BatchNormalization())
alexNet_model_2.add(Activation('relu'))
alexNet_model_2.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

alexNet_model_2.add(Convolution1D(filters=384, padding='same', kernel_size=3))
alexNet_model_2.add(tf.keras.layers.BatchNormalization())
alexNet_model_2.add(Activation('relu'))
alexNet_model_2.add(Convolution1D(filters=384, kernel_size=3))
alexNet_model_2.add(tf.keras.layers.BatchNormalization())
alexNet_model_2.add(Activation('relu'))
alexNet_model_2.add(Convolution1D(filters=256, kernel_size=3))
alexNet_model_2.add(tf.keras.layers.BatchNormalization())
alexNet_model_2.add(Activation('relu'))
alexNet_model_2.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))

alexNet_model_2.add(GlobalAveragePooling1D())
alexNet_model_2.add(Dense(128, activation='relu'))
alexNet_model_2.add(Dropout(0.2))
alexNet_model_2.add(Dense(128, activation='relu'))
alexNet_model_2.add(Dropout(0.2))
alexNet_model_2.add(Dense(5, activation='sigmoid'))

alexNet_model_2.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adamax(), metrics=[tf.keras.metrics.BinaryAccuracy(
    name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                tf.keras.metrics.AUC(
    num_thresholds=200,
    curve="ROC",
    summation_method="interpolation",
    name="AUC",
    dtype=None,
    thresholds=None,
    multi_label=True,
    label_weights=None,
)])

In [None]:
epoch_no=70
alex_model_2=alexNet_model_2.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=epoch_no)

In [None]:
accuracy_1, val_accuracy_1=alex_model_1.history['accuracy'], alex_model_1.history['val_accuracy']
accuracy_2, val_accuracy_2=alex_model_2.history['accuracy'], alex_model_2.history['val_accuracy']
accuracy_3, val_accuracy_3=alex_model.history['accuracy'], alex_model.history['val_accuracy']

plt.plot(range(epoch_no), val_accuracy_2, label='Adamax', linestyle='-', color='blue')
plt.plot(range(epoch_no), val_accuracy_1, label='Adam', linestyle='-', color='red')
plt.plot(range(epoch_no), val_accuracy_3, label='Adam d=0.2', linestyle='-', color='green')
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
# plt.title('Variation of test accuracy by dropout rate in a binary classification task')
plt.show()

# # Inception

In [None]:
def inception_block(prev_layer):
    
    conv1=Conv1D(filters = 64, kernel_size = 1, padding = 'same')(prev_layer)
    conv1=BatchNormalization()(conv1)
    conv1=Activation('relu')(conv1)
    
    conv3=Conv1D(filters = 64, kernel_size = 1, padding = 'same')(prev_layer)
    conv3=BatchNormalization()(conv3)
    conv3=Activation('relu')(conv3)
    conv3=Conv1D(filters = 64, kernel_size = 3, padding = 'same')(conv3)
    conv3=BatchNormalization()(conv3)
    conv3=Activation('relu')(conv3)
    
    conv5=Conv1D(filters = 64, kernel_size = 1, padding = 'same')(prev_layer)
    conv5=BatchNormalization()(conv5)
    conv5=Activation('relu')(conv5)
    conv5=Conv1D(filters = 64, kernel_size = 5, padding = 'same')(conv5)
    conv5=BatchNormalization()(conv5)
    conv5=Activation('relu')(conv5)
    
    pool= MaxPool1D(pool_size=3, strides=1, padding='same')(prev_layer)
    convmax=Conv1D(filters = 64, kernel_size = 1, padding = 'same')(pool)
    convmax=BatchNormalization()(convmax)
    convmax=Activation('relu')(convmax)
    
    layer_out = concatenate([conv1, conv3, conv5, convmax], axis=1)
    
    return layer_out

In [None]:
def inception_model(input_shape):
    X_input=Input(input_shape)
    
    X = ZeroPadding1D(padding=3)(X_input)
    
    X = Conv1D(filters = 64, kernel_size = 7, padding = 'same')(X)
    X = BatchNormalization()(X)
    X = Activation('relu')(X)
    X = MaxPool1D(pool_size=3, strides=2, padding='same')(X)
    
    X = Conv1D(filters = 64, kernel_size = 1, padding = 'same')(X)
    X = BatchNormalization()(X)
    X = Activation('relu')(X)
    
    X = inception_block(X)
    X = inception_block(X)
    
    X = MaxPool1D(pool_size=7, strides=2, padding='same')(X)
    
    X = GlobalAveragePooling1D()(X)
    X = Dense(5,activation='sigmoid')(X)
    
    incep_model = tf.keras.Model(inputs = X_input, outputs = X, name='Inception')
    
    return incep_model

In [None]:
inception_model = inception_model(input_shape = (200,12))

In [None]:
 inception_model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), metrics=[tf.keras.metrics.BinaryAccuracy(
        name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                    tf.keras.metrics.AUC(
        num_thresholds=200,
        curve="ROC",
        summation_method="interpolation",
        name="AUC",
        dtype=None,
        thresholds=None,
        multi_label=True,
        label_weights=None,
    )])

In [None]:

inception_model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=50)



In [None]:
model_4= tf.keras.Sequential()

model_4.add(layers.Input(shape=(200,12)))
#model.add(layers.Permute((2, 1)))

model_4.add(layers.Conv1D(filters=32, kernel_size=64, strides=1, padding='same'))
model_4.add(layers.LeakyReLU())

model_4.add(layers.Dropout(0.2))

model_4.add(layers.Conv1D(filters=64, kernel_size=64, strides=1, padding='same'))
#model.add(tf.keras.layers.BatchNormalization())
model_4.add(layers.LeakyReLU())
model_4.add(layers.Dropout(0.2))

model_4.add(layers.MaxPool1D(pool_size=2))

model_4.add(layers.Conv1D(filters=128, kernel_size=64, strides=1, padding='same'))
model_4.add(layers.LeakyReLU())
# model.add(tf.keras.layers.BatchNormalization())
model_4.add(layers.Dropout(0.2))

model_4.add(layers.Conv1D(filters=256, kernel_size=64, strides=1, padding='same'))
model_4.add(layers.LeakyReLU())
model_4.add(layers.Dropout(0.2))

model_4.add(layers.Conv1D(filters=512, kernel_size=64, strides=1, padding='same'))
model_4.add(layers.LeakyReLU())
model_4.add(layers.Dropout(0.2))



model_4.add(layers.MaxPool1D(pool_size=2))

model_4.add(layers.Flatten())

model_4.add(layers.Dense(2, activation='sigmoid'))

model_4.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(), metrics=[tf.keras.metrics.BinaryAccuracy(
    name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
                tf.keras.metrics.AUC(
    num_thresholds=200,
    curve="ROC",
    summation_method="interpolation",
    name="AUC",
    dtype=None,
    thresholds=None,
    multi_label=True,
    label_weights=None,
)])

In [None]:
model_4.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=50)

In [4]:

model = tf.keras.Sequential()

model.add(layers.Convolution1D(filters=96, kernel_size=11, strides=4, input_shape=(200,12)))
model.add(layers.BatchNormalization())
model.add(layers.LeakyReLU(alpha=0.001))

model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

model.add(layers.Convolution1D(filters=256, kernel_size=5, padding='same'))
model.add(tf.keras.layers.BatchNormalization())
model.add(layers.LeakyReLU(alpha=0.001))

model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))

model.add(layers.Convolution1D(filters=384, padding='same', kernel_size=3))
model.add(tf.keras.layers.BatchNormalization())
model.add(layers.LeakyReLU(alpha=0.001))

model.add(layers.Convolution1D(filters=384, kernel_size=3))
model.add(tf.keras.layers.BatchNormalization())
model.add(layers.LeakyReLU(alpha=0.001))

model.add(layers.Convolution1D(filters=256, kernel_size=3))
model.add(tf.keras.layers.BatchNormalization())
model.add(layers.LeakyReLU(alpha=0.001))

model.add(layers.Convolution1D(filters=128, kernel_size=2, padding='same'))
model.add(tf.keras.layers.BatchNormalization())
model.add(layers.LeakyReLU(alpha=0.001))


model.add(layers.MaxPooling1D(pool_size=2, strides=2, padding='same'))
model.add(layers.GlobalAveragePooling1D())

model.add(layers.Dense(128))
model.add(layers.LeakyReLU(alpha=0.001))
model.add(layers.Dropout(0.1))

model.add(layers.Dense(128))
model.add(layers.LeakyReLU(alpha=0.001))
model.add(layers.Dropout(0.1))
model.add(layers.Dense(5, activation='sigmoid'))

model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adamax(learning_rate=0.0003), metrics=[tf.keras.metrics.BinaryAccuracy(
name='accuracy', dtype=None, threshold=0.5),tf.keras.metrics.Recall(name='Recall'),tf.keras.metrics.Precision(name='Precision'), 
            tf.keras.metrics.AUC(
num_thresholds=200,
curve="ROC",
summation_method="interpolation",
name="AUC",
dtype=None,
thresholds=None,
multi_label=True,
label_weights=None,
)])

print(model.summary())


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_6 (Conv1D)            (None, 48, 96)            12768     
_________________________________________________________________
batch_normalization_6 (Batch (None, 48, 96)            384       
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU)    (None, 48, 96)            0         
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 24, 96)            0         
_________________________________________________________________
conv1d_7 (Conv1D)            (None, 24, 256)           123136    
_________________________________________________________________
batch_normalization_7 (Batch (None, 24, 256)           1024      
_________________________________________________________________
leaky_re_lu_9 (LeakyReLU)    (None, 24, 256)          

In [23]:
Y_real = pd.read_csv('Y_10s_superclass.csv')
Y_real=np.array(Y_real)
Y_real=Y_real[:1000,:]

In [26]:
A=Y_real[:10,:]
print(A)

[[1 0 0 0 0]
 [0 1 0 0 0]
 [1 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [1 0 0 0 0]
 [1 0 0 0 0]
 [0 1 0 1 1]
 [1 0 0 0 0]
 [0 0 0 0 1]]


In [30]:
a=np.unique(A, axis=0)

In [31]:
print(a)

[[0 0 0 0 0]
 [0 0 0 0 1]
 [0 1 0 0 0]
 [0 1 0 1 1]
 [1 0 0 0 0]]


In [34]:
def random_labels(labels, batch_size):
    index=np.random.randint(labels.shape[0], size=batch_size)
    return labels[index,:]

In [36]:
x=random_labels(A, 5)
a,b=x[:2,:]

In [38]:
print(b)

[0 1 0 0 0]


In [46]:
Y_real = pd.read_csv('Y_10s_superclass.csv')
Y_real=np.array(Y_real)
Y_real=Y_real[:1000,:]
Y_unique = np.unique(Y_real, axis=0)

X_real = np.loadtxt('X_10s_1000.csv')
X_real = X_real.reshape(X_real.shape[0], 1000, 1)
data = tf.data.Dataset.from_tensor_slices((X_real,Y_real))

In [50]:
print(data[0])

TypeError: 'TensorSliceDataset' object is not subscriptable