# LIBRARIES

In [1]:
import os
from scipy.io import loadmat
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp

from sklearn.model_selection import train_test_split
from sklearn import preprocessing # classification
from itertools import chain 

# FEATURE ENGINEERING
from ecgdetectors import Detectors

# CLASSIFICATION
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score

from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import keras

Using TensorFlow backend.


In [68]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix

# CONFIGURATION

In [2]:
# ENVIRONMENT
# tf2_gpu


# physionet data
pth_data = r'C:\Users\muham\Documents\rizwan-asus\DATA\PHYSIONET-2020\data1\Training_WFDB'

# pth_code = r'C:\Users\muham\Documents\rizwan-asus\PHYSIONET2020\code\physionet-python-2020-master'

pth_functions = r'C:\Users\muham\Documents\rizwan-asus\PHYSIONET2020\code\PhysioNet_2020'

pth_eval = r'C:\Users\muham\Documents\rizwan-asus\PHYSIONET2020\results'

pth_res = r'C:\Users\muham\Documents\rizwan-asus\PHYSIONET2020\results\res1'

pth_fig = r'C:\Users\muham\Documents\rizwan-asus\PHYSIONET2020\figures'

pth_pwd = os.getcwd()

# FUNCTION

In [3]:
# # GITHUB CODE
# os.chdir(pth_code)

# from driver import *
# from get_12ECG_features import *
# from run_12ECG_classifier import *

# LOCAL FUNCTIONS
os.chdir(pth_functions)

# PHYSIONET FUNCTIONS
from driver import *
from get_12ECG_features import *
from run_12ECG_classifier import *

# RIZ FUNCTIONS
from data_read import data_files_list
from data_read import data_files_load
from data_features import *

from data_preprocess import *
from data_prepare import *
from plt_ecg import *


# PHYSIONET SCORING 
from evaluate_12ECG_score import *

os.chdir(pth_pwd)

In [4]:
def r_peaks_idx2sample(r_peaks_idx,skip_direction = 'left',skip_values =2):
    """convert r-peaks indexes to peak-peak in terms of sample"""
    # skip_values = 2
    # skip_direction = 'both' # 'left', 'right', 'both'
    
    if(skip_direction == 'left'):
        r_idx_diff = np.diff(r_peaks_idx)[skip_values:]
    elif(skip_direction == 'right'):
        r_idx_diff = np.diff(r_peaks_idx)[:-skip_values]
    elif(skip_direction == 'both'):
        r_idx_diff = np.diff(r_peaks_idx)[skip_values:-skip_values]
    else: # default - 'left'
        r_idx_diff = np.diff(r_peaks_idx)[skip_values:]
        
    return r_idx_diff

In [5]:
def ecg_feature_extract(pth_data, list_fname, feat_type):
    "extract features based on the given specifications"
    features_matrix = []
    #for ii in range(len(list_data)):
    
    if(feat_type == 'raw-data'):
        print('FEATURE TYPE = raw-data')
        lead_no = 1
        NO_SAMPLES = 4500
        for ii in range(len(list_fname)):
            #-------------------------------------------------
            # META DATA FEATURES
            #-------------------------------------------------
            tmp_smp_name = list_fname[ii][:-4]

            print('ECG Sample Name:',tmp_smp_name)


            tmp_smp_mat = os.path.join(pth_data,tmp_smp_name+'.mat')
            tmp_smp_hea = os.path.join(pth_data,tmp_smp_name+'.hea')

            data, header_data = load_challenge_data(tmp_smp_mat)
            # data - ecg data
            # header_data - contains information such as fs, gain, etc.
            
            tmp_sample_ecg_all = data # ECG from all the leads
            tmp_sample_ecg_lead = data[lead_no-1]

            features_samp = np.zeros((0, NO_SAMPLES))

            
            if(len(tmp_sample_ecg_lead) > NO_SAMPLES):
                features_samp = tmp_sample_ecg_lead[0:NO_SAMPLES]
            else:
                features_samp[0,0:len(tmp_sample_ecg_lead)] = tmp_sample_ecg_lead
                    
            features_matrix.append(features_samp)

            del features_samp
            
        return np.asarray(features_matrix)
            
    
    else:
        lead_no = 1
        for ii in range(len(list_fname)):
            #-------------------------------------------------
            # META DATA FEATURES
            #-------------------------------------------------
            tmp_smp_name = list_fname[ii][:-4]

            print('ECG Sample Name:',tmp_smp_name)


            tmp_smp_mat = os.path.join(pth_data,tmp_smp_name+'.mat')
            tmp_smp_hea = os.path.join(pth_data,tmp_smp_name+'.hea')

            data, header_data = load_challenge_data(tmp_smp_mat)
            # data - ecg data
            # header_data - contains information such as fs, gain, etc. 

            tmp_hea = header_data[0].split(' ')
            # print(tmp_hea)
            # ['A0001', '12', '500', '7500', '16-Mar-2020', '19:07:01\n']
            ptID = tmp_hea[0] # 'A0001'
            num_leads = int(tmp_hea[1]) # '12'
            sample_Fs= int(tmp_hea[2]) # '500'
            gain_lead = np.zeros(num_leads) # 1000

            for ii in range(num_leads):
                tmp_hea = header_data[ii+1].split(' ')
                gain_lead[ii] = int(tmp_hea[2].split('/')[0])

            # for testing, we included the mean age of 57 if the age is a NaN
            # This value will change as more data is being released
            for iline in header_data:
                if iline.startswith('#Age'):
                    tmp_age = iline.split(': ')[1].strip()
                    tmp_sample_age = int(tmp_age if tmp_age != 'NaN' else 57)
                elif iline.startswith('#Sex'):
                    tmp_sex = iline.split(': ')[1]
                    if tmp_sex.strip()=='Female':
                        tmp_sample_sex =1
                    else:
                        tmp_sample_sex=0
                elif iline.startswith('#Dx'):
                    label = iline.split(': ')[1].split(',')[0]





            tmp_sample_ecg_all = data # ECG from all the leads
            tmp_sample_ecg_lead = data[lead_no-1]
            tmp_sample_ecg_g = gain_lead[lead_no-1]
            tmp_sample_ecg_fs = sample_Fs
            #------------------------------------------------------------
            # R-Peaks Features
            #------------------------------------------------------------
            r_peaks_pnet,r_idx_pnet = detect_peaks(tmp_sample_ecg_lead,tmp_sample_ecg_fs,tmp_sample_ecg_g)

            r_peaks_pnet = r_peaks_pnet.astype(int)
            r_idx_pnet_sample = r_peaks_idx2sample(r_idx_pnet)


            #------------------------------------------------------------
            # R-Peaks Statistical Features
            #------------------------------------------------------------
            #   mean
            mean_RR = np.mean(r_idx_pnet_sample/tmp_sample_ecg_fs)
            mean_Peaks = np.mean(r_peaks_pnet*tmp_sample_ecg_g)

            #   median
            median_RR = np.median(r_idx_pnet_sample/tmp_sample_ecg_fs)
            median_Peaks = np.median(r_peaks_pnet*tmp_sample_ecg_g)

            #   standard deviation
            std_RR = np.std(r_idx_pnet_sample/tmp_sample_ecg_fs*1000)
            std_Peaks = np.std(r_peaks_pnet*tmp_sample_ecg_g)

            #   variance
            var_RR = stats.tvar(r_idx_pnet_sample/tmp_sample_ecg_fs*1000)
            var_Peaks = stats.tvar(r_peaks_pnet*tmp_sample_ecg_g)

            #   Skewness
            skew_RR = stats.skew(r_idx_pnet_sample/tmp_sample_ecg_fs*1000)
            skew_Peaks = stats.skew(r_peaks_pnet*tmp_sample_ecg_g)

            #   Kurtosis
            kurt_RR = stats.kurtosis(r_idx_pnet_sample/tmp_sample_ecg_fs*1000)
            kurt_Peaks = stats.kurtosis(r_peaks_pnet*tmp_sample_ecg_g)

            features_samp = np.hstack([tmp_sample_age,tmp_sample_sex,mean_RR,mean_Peaks,median_RR,median_Peaks,std_RR,std_Peaks,var_RR,var_Peaks,skew_RR,skew_Peaks,kurt_RR,kurt_Peaks])

            features_matrix.append(features_samp)

            del features_samp

        return np.asarray(features_matrix)


# PARAMETERS

In [6]:
sample_no = 1 # index of the data sample
lead_no = 1 # 12-lead ECG waveform (1,2,3,... 12)

TOT_LEADS = 12
OUTPUT_CLASSES = 9

ANOMALIES_REMOVAL = False
NOISE_REMOVAL = False

# LOAD DATA 

##### List of data files ```data_read.py```

In [7]:
input_files = data_files_list(pth_data)
print('Total number of input files: ',len(input_files))

print(input_files[sample_no-1])

Total number of input files:  6877
A0001.mat


#### List of data and labels  ```data_read.py```

In [8]:
[list_data,list_label,list_fname,_] = data_files_load(pth_data,'',False,True)

# To get only 'First Label'
list_label = [item[0] for item in list_label]

Labels from REFERENCE file


In [9]:
print('Total Samples: ',len(list_label))
label_tmp = np.array(list_label) 

print('Unique labels',len(np.unique(label_tmp))) 
del label_tmp

Total Samples:  6877
Unique labels 9


# DATA SPLIT 
1. Training Data: **```X_train``` & ```Y_train```**
2. Validation Data: **```X_valid``` & ```Y_valid```**
3. Training Data: **```X_test``` & ```Y_test```**

In [10]:
# Split data into train and test subsets

# Train data (60%) +  Validation data (20%) + Test data (20%)
fname_train, fname_test, Y_train, Y_test = train_test_split(list_fname, list_label, test_size=0.2, shuffle=True,random_state=1)
fname_train, fname_valid, Y_train, Y_valid = train_test_split(fname_train, Y_train, test_size=0.25, shuffle=True,random_state=1)

# X_train - list of dimension samples x leads(12) x ecg signal
# Y_train - list of dimension samples x 1

In [11]:
print(len(fname_train),len(Y_train),len(fname_valid),len(Y_valid),len(fname_test),len(Y_test))

4125 4125 1376 1376 1376 1376


# DATA PREPARATION

In [94]:
[list_data_train,list_label_train,list_fname_train,list_meta_train] = data_files_load(pth_data,fname_train,False,True,False)
[list_data_valid,list_label_valid,list_fname_valid,list_meta_valid] = data_files_load(pth_data,fname_valid,False,True,False)
[list_data_test,list_label_test,list_fname_test,list_meta_test] = data_files_load(pth_data,fname_test,False,True,False)

Labels from REFERENCE file
Labels from REFERENCE file
Labels from REFERENCE file


In [95]:
print(len(list_data_train))
print(list_meta_train[0])

4125
['A4330 12 500 5226 16-Mar-2020 19:07:01\n', 'A4330.mat 16+24 1000/mV 16 0 1 10 0 I\n', 'A4330.mat 16+24 1000/mV 16 0 -16 3 0 II\n', 'A4330.mat 16+24 1000/mV 16 0 -16 21 0 III\n', 'A4330.mat 16+24 1000/mV 16 0 7 -39 0 aVR\n', 'A4330.mat 16+24 1000/mV 16 0 9 9 0 aVL\n', 'A4330.mat 16+24 1000/mV 16 0 -17 34 0 aVF\n', 'A4330.mat 16+24 1000/mV 16 0 1871 115 0 V1\n', 'A4330.mat 16+24 1000/mV 16 0 99 8 0 V2\n', 'A4330.mat 16+24 1000/mV 16 0 55 18 0 V3\n', 'A4330.mat 16+24 1000/mV 16 0 -46 2 0 V4\n', 'A4330.mat 16+24 1000/mV 16 0 -53 8 0 V5\n', 'A4330.mat 16+24 1000/mV 16 0 -20 -5 0 V6\n', '#Age: 58\n', '#Sex: Male\n', '#Dx: STD\n', '#Rx: Unknown\n', '#Hx: Unknown\n', '#Sx: Unknown\n']


In [96]:
# To get only 'First Label'
Y_train = [item[0] for item in list_label_train]
Y_valid = [item[0] for item in list_label_valid]

# # Data labels into matrix form i.e. [no of samples x no of output classes]
# lb = preprocessing.LabelBinarizer()
# lb.fit(Y_train)
# # lb.classes_
# Y_train = lb.transform(Y_train)
# Y_valid = lb.transform(Y_valid)

In [97]:
# To get only 'First Label'
Y_test = [item[0] for item in list_label_test]

# # Data labels into matrix form i.e. [no of samples x no of output classes]
# lb = preprocessing.LabelBinarizer()
# lb.fit(Y_test)
# # lb.classes_
# Y_test = lb.transform(Y_test)

# PRE-PROCESSING
- list --> list

In [98]:
print(Y_train)

[8, 4, 3, 2, 3, 7, 5, 1, 6, 6, 1, 8, 2, 5, 5, 5, 2, 2, 9, 6, 3, 1, 7, 5, 2, 2, 6, 7, 1, 3, 5, 5, 7, 5, 3, 5, 3, 7, 8, 6, 1, 2, 5, 1, 1, 4, 3, 5, 1, 5, 5, 5, 7, 2, 5, 1, 3, 7, 8, 7, 8, 1, 7, 2, 3, 2, 2, 5, 3, 5, 5, 3, 3, 2, 1, 5, 3, 7, 5, 2, 5, 5, 7, 5, 7, 4, 7, 2, 5, 5, 3, 8, 6, 1, 1, 6, 6, 2, 3, 7, 7, 1, 5, 1, 5, 1, 5, 1, 5, 5, 5, 7, 6, 8, 8, 4, 9, 2, 2, 8, 4, 8, 2, 2, 8, 5, 6, 6, 2, 5, 5, 3, 7, 5, 1, 3, 8, 2, 3, 1, 8, 1, 2, 1, 6, 7, 1, 3, 1, 3, 8, 9, 1, 8, 3, 6, 7, 2, 6, 8, 5, 8, 1, 8, 2, 9, 8, 2, 5, 2, 5, 1, 5, 7, 5, 2, 2, 6, 2, 5, 3, 5, 9, 2, 3, 3, 7, 7, 3, 6, 2, 2, 7, 7, 2, 5, 1, 8, 2, 6, 5, 8, 8, 2, 3, 5, 1, 5, 1, 7, 7, 6, 3, 2, 2, 1, 8, 7, 8, 3, 5, 2, 3, 2, 5, 5, 3, 3, 7, 2, 2, 5, 8, 5, 9, 8, 8, 9, 2, 2, 7, 1, 7, 5, 7, 2, 2, 2, 5, 1, 5, 5, 5, 2, 1, 1, 5, 2, 5, 6, 7, 7, 6, 8, 8, 5, 5, 5, 7, 8, 8, 1, 2, 8, 8, 3, 3, 8, 4, 2, 5, 6, 1, 5, 8, 1, 7, 1, 4, 5, 4, 2, 5, 8, 5, 5, 5, 1, 7, 3, 5, 5, 2, 8, 1, 1, 5, 4, 6, 7, 9, 2, 6, 6, 8, 2, 1, 6, 1, 2, 1, 5, 9, 2, 2, 5, 8, 8, 8, 2, 7, 2, 9, 

#### Type I
- removal of anomalies
- removal of noise

# FEATURE ENGINEERING

In [32]:

X_train = []
for ii in range(len(list_data_train)):
# for ii in range(2):
    #features = get_12ECG_features_r1(data, header_data)
    features = get_12ECG_features_r1(list_data_train[ii], list_meta_train[ii])
    
    X_train.append(features)
    
X_train = np.array(X_train) 

In [33]:
X_valid = []
for ii in range(len(list_data_valid)):
    features = get_12ECG_features_r1(list_data_valid[ii], list_meta_valid[ii])
    
    X_valid.append(features)
    
X_valid = np.array(X_valid) 

In [49]:
X_test = []
for ii in range(len(list_data_test)):
    features = get_12ECG_features_r1(list_data_test[ii], list_meta_test[ii])
    
    X_test.append(features)
    
X_test = np.array(X_test) 

In [34]:
print('Training Features Shape:', X_train.shape)
print('Training Labels Shape:', Y_train.shape)
print('Testing Features Shape:', X_valid.shape)
print('Testing Labels Shape:', Y_valid.shape)

Training Features Shape: (4125, 14)
Training Labels Shape: (4125, 9)
Testing Features Shape: (1376, 14)
Testing Labels Shape: (1376, 9)


# CLASSIFICATION

### Random Forest

In [99]:
clf_rf = RandomForestClassifier(n_jobs=2, random_state=0)

clf_rf.fit(X_train,Y_train)

RandomForestClassifier(n_jobs=2, random_state=0)

### Random Forest + Grid Search

In [106]:
param_grid = { 
    'n_estimators': [10, 25, 50, 100, 200, 300, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [2, 4,5,6,7,8],
    'criterion' :['gini', 'entropy']
}

CV_rf = GridSearchCV(estimator=clf_rf, param_grid=param_grid, cv= 5)
CV_rf.fit(X_train, Y_train)

CV_rf.best_params_

{'criterion': 'gini',
 'max_depth': 7,
 'max_features': 'auto',
 'n_estimators': 200}

In [107]:
clf_rf1 = RandomForestClassifier(random_state=42, 
                                 max_features=CV_rf.best_params_['max_features'], 
                                 n_estimators= CV_rf.best_params_['n_estimators'], 
                                 max_depth=CV_rf.best_params_['max_depth'], 
                                 criterion=CV_rf.best_params_['criterion'])

In [108]:
clf_rf1.fit(X_train, Y_train)

RandomForestClassifier(max_depth=7, n_estimators=200, random_state=42)

### DNN Classifier

In [65]:
classifier = Sequential()

classifier.add(Dense(100, activation='relu', kernel_initializer='random_normal', input_dim=X_train.shape[1]))
classifier.add(Dense(100, activation='relu', kernel_initializer='random_normal'))
classifier.add(Dense(100, activation='relu', kernel_initializer='random_normal'))

classifier.add(Dense(Y_train.shape[1], activation='sigmoid', kernel_initializer='random_normal'))

In [66]:
classifier.compile(optimizer ='adam',loss='binary_crossentropy', metrics =['accuracy'])

In [67]:
#Fitting the data to the training dataset
classifier.fit(X_train,Y_train, batch_size=10, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100


Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.callbacks.History at 0x1a33a6c7348>

# PREDICTION

In [109]:
Y_pred = clf_rf1.predict(X_test)
Y_prob = clf_rf1.predict_proba(X_test)

In [110]:
print(Y_pred)

[8 5 1 ... 5 5 5]


In [111]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

print(confusion_matrix(Y_test,Y_pred))
print(classification_report(Y_test,Y_pred))
print(accuracy_score(Y_test, Y_pred))

[[107   8   1   0  61   1   1  10   0]
 [  1 104   4   0  80   5   1   9   0]
 [ 12  24   6   0 109   2   2   3   0]
 [  1  22   0   0  17   0   0   4   0]
 [ 31  54   5   0 203   7   7  22   0]
 [  7  34   5   0  46  12   8  12   0]
 [ 10  28   3   0  52   1  24   6   0]
 [ 23  20   2   0  61   2   4  52   0]
 [ 13   3   0   0  22   0   2   0   0]]
              precision    recall  f1-score   support

           1       0.52      0.57      0.54       189
           2       0.35      0.51      0.42       204
           3       0.23      0.04      0.07       158
           4       0.00      0.00      0.00        44
           5       0.31      0.62      0.41       329
           6       0.40      0.10      0.16       124
           7       0.49      0.19      0.28       124
           8       0.44      0.32      0.37       164
           9       0.00      0.00      0.00        40

    accuracy                           0.37      1376
   macro avg       0.31      0.26      0.25      137

  _warn_prf(average, modifier, msg_start, len(result))


### DNN Classifier

In [86]:
Y_pred=classifier.predict(X_test)
# Y_pred =(Y_pred>0.5)

In [90]:
print(Y_pred)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [88]:


accuracy=accuracy_score(Y_test, Y_pred)
print('Accuracy: ',accuracy)

# roc_value = roc_auc_score(Y_test,Y_prob)
# print('ROC: ',roc_value)

ValueError: Classification metrics can't handle a mix of multilabel-indicator and continuous-multioutput targets

In [73]:
# cm = confusion_matrix(Y_test, Y_pred)
# print(cm)

from sklearn import metrics

print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

### LSTM Model

In [None]:
feat_dim = X_train.shape[2]

In [None]:
# create and fit the LSTM network
batch_size = 64
model = Sequential()
model.add(LSTM(512, return_sequences=True, input_shape=(1, feat_dim)))
#model.add(Dropout(0.25))
model.add(LSTM(256, return_sequences=True))
#model.add(Dropout(0.25))
model.add(LSTM(128, return_sequences=True))
#model.add(Dropout(0.25))
model.add(LSTM(64, return_sequences=True))
#model.add(Dropout(0.25))
model.add(LSTM(32))
model.add(Dense(OUTPUT_CLASSES, activation='softmax'))
early_stopping = keras.callbacks.EarlyStopping(monitor='val_acc', min_delta=0, patience=50, verbose=1, mode='auto')
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, Y_train, epochs=100, batch_size=batch_size, validation_data=(X_valid, Y_valid), verbose=2, shuffle=False, callbacks=[early_stopping])


In [None]:
model.save('ecg_lstm.h5')

In [None]:
pred_prob = model.predict(X_test)
pred_classes = model.predict_classes(X_test)

In [None]:
score = accuracy_score(Y_test, lb.transform(pred_classes))
print(score)

### LSTM Model

In [None]:
from keras.layers import (Input, Conv1D, MaxPooling1D, Dropout,
                          BatchNormalization, Activation, Add,
                          Flatten, Dense)
from keras.models import Model

In [None]:
class ResidualUnit(object):
    """Residual unit block (unidimensional).
    Parameters
    ----------
    n_samples_out: int
        Number of output samples.
    n_filters_out: int
        Number of output filters.
    kernel_initializer: str, otional
        Initializer for the weights matrices. See Keras initializers. By default it uses
        'he_normal'.
    dropout_rate: float [0, 1), optional
        Dropout rate used in all Dropout layers. Default is 0.8
    kernel_size: int, optional
        Kernel size for convolutional layers. Default is 17.
    preactivation: bool, optional
        When preactivation is true use full preactivation architecture proposed
        in [1]. Otherwise, use architecture proposed in the original ResNet
        paper [2]. By default it is true.
    postactivation_bn: bool, optional
        Defines if you use batch normalization before or after the activation layer (there
        seems to be some advantages in some cases:
        https://github.com/ducha-aiki/caffenet-benchmark/blob/master/batchnorm.md).
        If true, the batch normalization is used before the activation
        function, otherwise the activation comes first, as it is usually done.
        By default it is false.
    activation_function: string, optional
        Keras activation function to be used. By default 'relu'.
    References
    ----------
    .. [1] K. He, X. Zhang, S. Ren, and J. Sun, "Identity Mappings in Deep Residual Networks,"
           arXiv:1603.05027 [cs], Mar. 2016. https://arxiv.org/pdf/1603.05027.pdf.
    .. [2] K. He, X. Zhang, S. Ren, and J. Sun, "Deep Residual Learning for Image Recognition," in 2016 IEEE Conference
           on Computer Vision and Pattern Recognition (CVPR), 2016, pp. 770-778. https://arxiv.org/pdf/1512.03385.pdf
    """

    def __init__(self, n_samples_out, n_filters_out, kernel_initializer='he_normal',
                 dropout_rate=0.8, kernel_size=17, preactivation=True,
                 postactivation_bn=False, activation_function='relu'):
        self.n_samples_out = n_samples_out
        self.n_filters_out = n_filters_out
        self.kernel_initializer = kernel_initializer
        self.dropout_rate = dropout_rate
        self.kernel_size = kernel_size
        self.preactivation = preactivation
        self.postactivation_bn = postactivation_bn
        self.activation_function = activation_function

    def _skip_connection(self, y, downsample, n_filters_in):
        """Implement skip connection."""
        # Deal with downsampling
        if downsample > 1:
            y = MaxPooling1D(downsample, strides=downsample, padding='same')(y)
        elif downsample == 1:
            y = y
        else:
            raise ValueError("Number of samples should always decrease.")
        # Deal with n_filters dimension increase
        if n_filters_in != self.n_filters_out:
            # This is one of the two alternatives presented in ResNet paper
            # Other option is to just fill the matrix with zeros.
            y = Conv1D(self.n_filters_out, 1, padding='same',
                       use_bias=False, kernel_initializer=self.kernel_initializer)(y)
        return y

    def _batch_norm_plus_activation(self, x):
        if self.postactivation_bn:
            x = Activation(self.activation_function)(x)
            x = BatchNormalization(center=False, scale=False)(x)
        else:
            x = BatchNormalization()(x)
            x = Activation(self.activation_function)(x)
        return x

    def __call__(self, inputs):
        """Residual unit."""
        x, y = inputs
        n_samples_in = y.shape[1].value
        downsample = n_samples_in // self.n_samples_out
        n_filters_in = y.shape[2].value
        y = self._skip_connection(y, downsample, n_filters_in)
        # 1st layer
        x = Conv1D(self.n_filters_out, self.kernel_size, padding='same',
                   use_bias=False, kernel_initializer=self.kernel_initializer)(x)
        x = self._batch_norm_plus_activation(x)
        if self.dropout_rate > 0:
            x = Dropout(self.dropout_rate)(x)

        # 2nd layer
        x = Conv1D(self.n_filters_out, self.kernel_size, strides=downsample,
                   padding='same', use_bias=False,
                   kernel_initializer=self.kernel_initializer)(x)
        if self.preactivation:
            x = Add()([x, y])  # Sum skip connection and main connection
            y = x
            x = self._batch_norm_plus_activation(x)
            if self.dropout_rate > 0:
                x = Dropout(self.dropout_rate)(x)
        else:
            x = BatchNormalization()(x)
            x = Add()([x, y])  # Sum skip connection and main connection
            x = Activation(self.activation_function)(x)
            if self.dropout_rate > 0:
                x = Dropout(self.dropout_rate)(x)
            y = x
        return [x, y]

In [None]:
# ----- Model ----- #
kernel_size = 16
kernel_initializer = 'he_normal'
signal = Input(shape=(4096, 12), dtype=np.float32, name='signal')
age_range = Input(shape=(6,), dtype=np.float32, name='age_range')
is_male = Input(shape=(1,), dtype=np.float32, name='is_male')


x = signal
x = Conv1D(64, kernel_size, padding='same', use_bias=False,
           kernel_initializer=kernel_initializer)(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x, y = ResidualUnit(1024, 128, kernel_size=kernel_size,
                    kernel_initializer=kernel_initializer)([x, x])
x, y = ResidualUnit(256, 196, kernel_size=kernel_size,
                    kernel_initializer=kernel_initializer)([x, y])
x, y = ResidualUnit(64, 256, kernel_size=kernel_size,
                    kernel_initializer=kernel_initializer)([x, y])
x, _ = ResidualUnit(16, 320, kernel_size=kernel_size,
                    kernel_initializer=kernel_initializer)([x, y])
x = Flatten()(x)
diagn = Dense(6, activation='sigmoid', kernel_initializer=kernel_initializer)(x)
model = Model(signal, diagn)

In [None]:
history = model.fit(x, y,
            batch_size=batch_size,
            epochs=70,
            initial_epoch=0,  # If you are continuing a interrupted section change here
            validation_split=args.val_split,
            shuffle='batch',  # Because our dataset is an HDF5 file
            callbacks=callbacks,
            verbose=1)

### MISC

# JUNK