In [1]:
import h5py
import numpy as np
import pandas as pd
import tensorflow as tf
import sys
import time
import sklearn

In [2]:
pheno_train = pd.read_csv('pheno_train.csv')
pheno_test = pd.read_csv('pheno_test.csv')
ss = pd.read_csv('sample_submission.csv')
func = h5py.File('abide.hdf5', 'r')

# adjust according to submission format
pheno_train['DX_GROUP'] = pheno_train['DX_GROUP'] - 1.0

In [3]:
def get_data_train(data, pheno, derivative):
    X = []
    y = []
    i = 0
    total = pheno.shape[0]
    for row in pheno.iterrows():
        file_id, dx_group = row[1]['FILE_ID'], row[1]['DX_GROUP']
        connectivity = data['patients'][file_id][derivative][()]
        X.append(connectivity)
        y.append(dx_group)
        sys.stdout.write("\r{:.2f}%>".format(i/total))
        sys.stdout.flush()
        i += 1
        
    X = np.array(X).astype(np.float32)
    y = np.array(y).astype(np.float32)
    return X, y

def get_data_test(data, pheno, derivative):
    X_test = []
    sub_ids = []
    j = 0
    total = pheno.shape[0]
    for row in pheno.iterrows():
        file_id, sub_id = row[1]['FILE_ID'], row[1]['SUB_ID']
        connectivity = data['patients'][file_id][derivative][()]
        X_test.append(connectivity)
        sub_ids.append(sub_id)
        sys.stdout.write("\r{:.2f}%>".format(j/total))
        sys.stdout.flush()
        j += 1
        
    X_test = np.array(X_test).astype(np.float32)
    return X_test, sub_ids

In [4]:
X, y = get_data_train(func, pheno_train, 'aal')
X.shape, y.shape

1.00%>

((931, 6670), (931,))

In [5]:
X_test, sub_ids = get_data_test(func, pheno_test, 'aal')
X_test.shape, len(sub_ids)

0.99%>

((104, 6670), 104)

In [6]:
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier as hgbc
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score
import gc

N_SPLITS = 10
kf = KFold(n_splits=N_SPLITS)

## Baseline

In [7]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
for tr_idx, val_idx in kf.split(X):
    clf = hgbc(max_depth=6, max_leaf_nodes=25, verbose=0, max_iter=30)
    clf.fit(X[tr_idx], y[tr_idx])
    final_preds[:, i] = clf.predict_proba(X_test)[:, 1]
    fold_preds = clf.predict_proba(X[val_idx])[:, 1]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del clf
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.6535326086956522
Fold: 1, score: 0.6932573599240266
Fold: 2, score: 0.6077705827937097
Fold: 3, score: 0.6808905380333952
Fold: 4, score: 0.7437258687258688
Fold: 5, score: 0.651031894934334
Fold: 6, score: 0.7078703703703703
Fold: 7, score: 0.7574074074074074
Fold: 8, score: 0.6875
Fold: 9, score: 0.6796296296296298
Mean (std): 0.6862616260514394(0.04174598358018723)


## Experiments

## CNN

In [20]:
from numpy import mean
from numpy import std
from numpy import dstack
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.metrics import *
import numpy as np
from tensorflow.keras.regularizers import l2

from sklearn.model_selection import train_test_split
import tensorflow as tf

In [11]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

4 Physical GPUs, 1 Logical GPU


### Relu

In [22]:
def cnn_Relu():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(6670,1)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation= 'relu' ))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid' ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy',Precision(),Recall(),AUC()])
    return model

In [23]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=20,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_Relu()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx]
    X_val = X[val_idx] 
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.6304347826086956
Fold: 1, score: 0.7283950617283951
Fold: 2, score: 0.6799259944495838
Fold: 3, score: 0.6730055658627085
Fold: 4, score: 0.7166988416988417
Fold: 5, score: 0.6580675422138837
Fold: 6, score: 0.9046296296296297
Fold: 7, score: 0.7513888888888889
Fold: 8, score: 0.6828703703703703
Fold: 9, score: 0.6671296296296297
Mean (std): 0.7092546307080626(0.07337980191596208)


In [34]:
def cnn_Relu_v2():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(6670,1)))
    model.add(Dropout(0.5))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation= 'relu' ))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid' ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy',Precision(),Recall(),AUC()])
    return model

In [36]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=50,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_Relu_v2()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx]
    X_val = X[val_idx] 
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.5797101449275363
Fold: 1, score: 0.6937321937321937
Fold: 2, score: 0.6285846438482887
Fold: 3, score: 0.6804267161410018
Fold: 4, score: 0.7282818532818532
Fold: 5, score: 0.6796435272045028
Fold: 6, score: 0.7625
Fold: 7, score: 0.7474537037037037
Fold: 8, score: 0.6331018518518519
Fold: 9, score: 0.6893518518518518
Mean (std): 0.6822786486542783(0.053677590033873686)


In [37]:
def cnn_Relu_v3():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(6670,1)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation= 'relu' ))
    model.add(Dropout(0.25))
    model.add(Dense(50, activation= 'relu' ))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid' ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy',Precision(),Recall(),AUC()])
    return model

In [38]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=20,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_Relu_v3()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx]
    X_val = X[val_idx] 
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.5892210144927537
Fold: 1, score: 0.7559354226020893
Fold: 2, score: 0.7354301572617946
Fold: 3, score: 0.6739332096474954
Fold: 4, score: 0.7104247104247104
Fold: 5, score: 0.6130393996247655
Fold: 6, score: 0.8078703703703703
Fold: 7, score: 0.6916666666666667
Fold: 8, score: 0.6224537037037037
Fold: 9, score: 0.6884259259259259
Mean (std): 0.6888400580720275(0.06450872406853164)


### Relu-Normalization

In [24]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=20,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_Relu()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx] - mean
    X_train /= std
    X_val = X[val_idx] - mean
    X_val /= std
    X_test -= mean
    X_test /=std
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.6186594202898551
Fold: 1, score: 0.7274453941120609
Fold: 2, score: 0.7183163737280296
Fold: 3, score: 0.7124304267161411
Fold: 4, score: 0.7591698841698842
Fold: 5, score: 0.6604127579737336
Fold: 6, score: 0.8449074074074074
Fold: 7, score: 0.700462962962963
Fold: 8, score: 0.6453703703703703
Fold: 9, score: 0.6856481481481481
Mean (std): 0.7072823145878594(0.06048652050505688)


### Relu-Normalization-L2

In [25]:
def cnn_relu_l2():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(6670,1),kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu',kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation= 'relu',kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01) ))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid',kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01) ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy',Precision(),Recall(),AUC()])
    return model

In [26]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=20,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_relu_l2()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx] - mean
    X_train /= std
    X_val = X[val_idx] - mean
    X_val /= std
    X_test -= mean
    X_test /=std
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.5801630434782609
Fold: 1, score: 0.7378917378917378
Fold: 2, score: 0.6623496762257168
Fold: 3, score: 0.6600185528756957
Fold: 4, score: 0.790057915057915
Fold: 5, score: 0.7176360225140713
Fold: 6, score: 0.7666666666666667
Fold: 7, score: 0.7032407407407407
Fold: 8, score: 0.6773148148148148
Fold: 9, score: 0.65
Mean (std): 0.694533917026562(0.05865576198393549)


### LeakyRelu

In [44]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=20,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_LeakyRelu_Normalization()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx]
    X_val = X[val_idx] 
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.6521739130434782
Fold: 1, score: 0.7711301044634378
Fold: 2, score: 0.6981961147086032
Fold: 3, score: 0.6892393320964749
Fold: 4, score: 0.7799227799227799
Fold: 5, score: 0.6362570356472796
Fold: 6, score: 0.8358796296296296
Fold: 7, score: 0.7212962962962963
Fold: 8, score: 0.6206018518518518
Fold: 9, score: 0.6578703703703703
Mean (std): 0.7062567428030201(0.06672405704598806)


### CNN-LeakyRelu-Normalization-L2

In [30]:
def cnn_LeakyRelu_Normalization_L2():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, input_shape=(6670,1),kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
    model.add(LeakyReLU())
    model.add(Conv1D(filters=64, kernel_size=3,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
    model.add(LeakyReLU())
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100,kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
    model.add(LeakyReLU())
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid',kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01) ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=[AUC()])
    return model

In [40]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=30,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_LeakyRelu_Normalization_L2()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx] - mean
    X_train /= std
    X_val = X[val_idx] - mean
    X_val /= std
    X_test -= mean
    X_test /=std
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.5339673913043478
Fold: 1, score: 0.6894586894586894
Fold: 2, score: 0.6655874190564293
Fold: 3, score: 0.6632653061224489
Fold: 4, score: 0.7842664092664093
Fold: 5, score: 0.7265478424015009
Fold: 6, score: 0.8027777777777778
Fold: 7, score: 0.7356481481481482
Fold: 8, score: 0.6657407407407409
Fold: 9, score: 0.6189814814814815
Mean (std): 0.6886241205757974(0.07502991299410879)


### CNN-LeakyRelu-Normalization

In [41]:
def cnn_LeakyRelu_Normalization():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, input_shape=(6670,1)))
    model.add(LeakyReLU())
    model.add(Conv1D(filters=64, kernel_size=3))
    model.add(LeakyReLU())
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100))
    model.add(LeakyReLU())
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid' ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=[AUC()])
    return model

In [45]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=20,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_LeakyRelu_Normalization()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx] - mean
    X_train /= std
    X_val = X[val_idx] - mean
    X_val /= std
    X_test -= mean
    X_test /=std
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.6478713768115942
Fold: 1, score: 0.7207977207977208
Fold: 2, score: 0.6947271045328399
Fold: 3, score: 0.7147495361781077
Fold: 4, score: 0.7569980694980695
Fold: 5, score: 0.7030956848030019
Fold: 6, score: 0.8439814814814814
Fold: 7, score: 0.7157407407407408
Fold: 8, score: 0.6398148148148148
Fold: 9, score: 0.6243055555555556
Mean (std): 0.7062082085213927(0.06049675926413624)


### SeLU

In [27]:
def cnn_Selu():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='selu', input_shape=(6670,1)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='selu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation= 'selu' ))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid' ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy',Precision(),Recall(),AUC()])
    return model

In [28]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=20,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_Selu()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx]
    X_val = X[val_idx] 
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.6759510869565218
Fold: 1, score: 0.6844729344729346
Fold: 2, score: 0.7583256244218316
Fold: 3, score: 0.6713821892393321
Fold: 4, score: 0.7567567567567569
Fold: 5, score: 0.6550187617260788
Fold: 6, score: 0.7972222222222223
Fold: 7, score: 0.7106481481481481
Fold: 8, score: 0.6243055555555556
Fold: 9, score: 0.6479166666666667
Mean (std): 0.6981999946166049(0.05316051604664803)


### Selu-Normalization

In [29]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=20,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_Selu()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx] - mean
    X_train /= std
    X_val = X[val_idx] - mean
    X_val /= std
    X_test -= mean
    X_test /=std
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.6096014492753623
Fold: 1, score: 0.5788224121557455
Fold: 2, score: 0.635522664199815
Fold: 3, score: 0.6240723562152133
Fold: 4, score: 0.7236969111969113
Fold: 5, score: 0.6053001876172608
Fold: 6, score: 0.8222222222222222
Fold: 7, score: 0.7074074074074073
Fold: 8, score: 0.6131944444444445
Fold: 9, score: 0.6655092592592593
Mean (std): 0.6585349313993641(0.06988009414568881)


### Selu-Normalization-L2

In [30]:
def cnn_selu_l2():
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='selu', input_shape=(6670,1),kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='selu',kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation= 'selu',kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01) ))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid',kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01) ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy',Precision(),Recall(),AUC()])
    return model

In [31]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=20,10,False
for tr_idx, val_idx in kf.split(X):
    model = cnn_selu_l2()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx] - mean
    X_train /= std
    X_val = X[val_idx] - mean
    X_val /= std
    X_test -= mean
    X_test /=std
    
    model.fit(np.reshape(X_train,(X[tr_idx].shape[0],X[tr_idx].shape[1],1)),
                         y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(np.reshape(X_val,(X[val_idx].shape[0],6670,1)))[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.6277173913043479
Fold: 1, score: 0.6509971509971509
Fold: 2, score: 0.66049953746531
Fold: 3, score: 0.7077922077922079
Fold: 4, score: 0.7562741312741313
Fold: 5, score: 0.6186679174484052
Fold: 6, score: 0.7685185185185185
Fold: 7, score: 0.7388888888888889
Fold: 8, score: 0.6504629629629631


  


Fold: 9, score: 0.6560185185185186
Mean (std): 0.6835837225170442(0.05189337667037995)


## CNN-LSTM

In [7]:
from numpy import mean
from numpy import std
from numpy import dstack
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import LSTM
from tensorflow.keras.metrics import *
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.regularizers import l2

In [8]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.experimental.set_visible_devices(gpus[1], 'GPU')
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

4 Physical GPUs, 1 Logical GPU


In [13]:
def cnn_lstm():
    model = Sequential()
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'),
    input_shape=(None,667,1)))
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
    model.add(TimeDistributed(Dropout(0.5)))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(100))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid' ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy',Precision(),Recall(),AUC()])
    return model

In [15]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=50,128,0
for tr_idx, val_idx in kf.split(X):
    model = cnn_lstm()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx]
    X_val = X[val_idx] 
    X_train = X_train.reshape((X_train.shape[0], 10, 667, 1))
    X_val = X_val.reshape((X_val.shape[0], 10, 667, 1))
    model.fit(X_train,y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    #final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(X_val)[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Instructions for updating:
Please use `model.predict()` instead.
Fold: 0, score: 0.6567028985507246
Fold: 1, score: 0.6828110161443495
Fold: 2, score: 0.6119333950046253
Fold: 3, score: 0.7430426716141002
Fold: 4, score: 0.6655405405405407
Fold: 5, score: 0.6918386491557224
Fold: 6, score: 0.7856481481481481
Fold: 7, score: 0.7124999999999999
Fold: 8, score: 0.6856481481481481
Fold: 9, score: 0.6476851851851851
Mean (std): 0.6883350652491544(0.047017575866168324)


### Normalization

In [16]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=50,128,0
for tr_idx, val_idx in kf.split(X):
    model = cnn_lstm()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx] - mean
    X_train /= std
    X_val = X[val_idx] - mean
    X_val /= std
    X_train = X_train.reshape((X_train.shape[0], 10, 667, 1))
    X_val = X_val.reshape((X_val.shape[0], 10, 667, 1))
    model.fit(X_train,y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    #final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(X_val)[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.49501811594202894
Fold: 1, score: 0.6918328584995251
Fold: 2, score: 0.6253469010175763
Fold: 3, score: 0.549165120593692
Fold: 4, score: 0.7002895752895753
Fold: 5, score: 0.6163227016885552
Fold: 6, score: 0.6763888888888889
Fold: 7, score: 0.7467592592592593
Fold: 8, score: 0.6597222222222222
Fold: 9, score: 0.6143518518518519
Mean (std): 0.6375197495253175(0.07076333492459112)


### Normalization-L2

In [17]:
from tensorflow.keras.regularizers import l2

In [18]:
def cnn_lstm_l2():
    model = Sequential()
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)),
    input_shape=(None,667,1)))
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01))))
    model.add(TimeDistributed(Dropout(0.5)))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(100, kernel_regularizer=l2(0.01), recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
    model.add(Dropout(0.25))
    model.add(Dense(1, activation= 'sigmoid', kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01) ))
    model.compile(loss= 'binary_crossentropy' , optimizer= 'adam' , metrics=['accuracy',Precision(),Recall(),AUC()])
    return model

In [19]:
final_preds = np.zeros((X_test.shape[0], N_SPLITS))
i = 0
scores = []
epochs,batch_size,verbose=50,128,0
for tr_idx, val_idx in kf.split(X):
    model = cnn_lstm_l2()
    mean = X[tr_idx].mean(axis=0)
    std = X[tr_idx].std(axis=0)
    X_train =  X[tr_idx] - mean
    X_train /= std
    X_val = X[val_idx] - mean
    X_val /= std
    X_train = X_train.reshape((X_train.shape[0], 10, 667, 1))
    X_val = X_val.reshape((X_val.shape[0], 10, 667, 1))
    model.fit(X_train,y[tr_idx], epochs=epochs, batch_size=batch_size, verbose=verbose)
    #final_preds[:, i] = model.predict_proba(np.reshape(X_test,(X_test.shape[0],6670,1)))[:, 0]
    fold_preds = model.predict_proba(X_val)[:, 0]
    print('Fold: {0}, score: {1}'.format(i, roc_auc_score(y[val_idx], fold_preds)))
    scores.append(roc_auc_score(y[val_idx], fold_preds))
    del model
    gc.collect()
    i += 1
print('Mean (std): {0}({1})'.format(np.mean(scores),np.std(scores)))

Fold: 0, score: 0.5584239130434783
Fold: 1, score: 0.6595441595441596
Fold: 2, score: 0.601295097132285
Fold: 3, score: 0.6433209647495363
Fold: 4, score: 0.6766409266409267
Fold: 5, score: 0.6374296435272045
Fold: 6, score: 0.7032407407407407
Fold: 7, score: 0.7199074074074074
Fold: 8, score: 0.6203703703703703
Fold: 9, score: 0.5509259259259259
Mean (std): 0.6371099149082035(0.0533998753247405)
