In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Dense,Input,Dropout,GaussianNoise
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import StratifiedKFold,RepeatedStratifiedKFold,train_test_split,KFold
from sklearn.decomposition import PCA
from pandas import DataFrame
from time import time
import numpy as np
import h5py
from docopt import docopt
from utils import  load_fold

In [None]:
x_train, y_train,x_valid, y_valid,x_test, y_test=[],[],[],[],[],[]
X,Y=[],[]
experiments=['cc200_whole']
with h5py.File("data/sample.hdf5".encode('utf-8'), "r") as hdf5:
        
        for experiment in experiments:
            exp_storage=hdf5["experiments/cc200_whole"]
            for fold in exp_storage:
                
                experiment_cv="{}_{}".format(experiment,fold)

                x_train, y_train, \
                x_valid, y_valid, \
                x_test, y_test = load_fold(hdf5["patients"], exp_storage, fold)
                y_train,y_valid,y_test=y_train.reshape(y_train.shape[0],1),y_valid.reshape(y_valid.shape[0],1),y_test.reshape(y_test.shape[0],1)
                x_train, y_train,x_valid, y_valid,x_test, y_test=list(x_train), list(y_train),list(x_valid), list(y_valid),list(x_test),list(y_test)    
                X=x_train[:]
                Y=y_train[:]
                for i in x_valid:
                    X.append(i)
                for i in y_valid:
                    Y.append(i)
                for i in x_test:
                    X.append(i)
                for i in y_test:
                    Y.append(i)
                X=np.array(X)
                Y=np.array(Y)
                break
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.15,shuffle=True,random_state=1)          

p=n=0
for i in Y_train:
    if i[0]==1:
        p=p+1
    else:
        n=n+1
print('Training Examples: p-{} n-{}'.format(p,n))
p=n=0
for i in Y_test:
    if i[0]==1:
        p=p+1
    else:
        n=n+1
print('Testing Examples:  p-{} n-{}'.format(p,n))

In [None]:
def train_and_evaluate_model(x_train,y_train,x_test,y_test,i):
    #callbacks
    tensorboard=TensorBoard(log_dir='./logs/Exp1/fold_{}'.format(i))
    Results={}
    #Autoencoder-1 architecture
    print("Constructing ae1 architecture...",end="")
    input_size=x_train.shape[1]
    ae1_input_layer=Input((input_size))
    ae1_input_layer_noise=GaussianNoise(0.1)(ae1_input_layer)
    ae1_bottle_neck=Dense(1000,activation='relu')(ae1_input_layer)
    ae1_ouput_layer=Dense(input_size,activation='relu')(ae1_bottle_neck)
    ae1=Model(ae1_input_layer,ae1_ouput_layer)
    print("done!")
    print("Configuring ae1...",end="")
    ae1.compile(optimizer='adam',loss='mse',metrics=['mse','mae'])
    print("done!")
    #ae1.summary()

    #Traininig ae1
    print("Training ae1...")
    ae1.fit(x_train,x_train,epochs=50,batch_size=16,validation_split=0.1,shuffle=True)
    print("done!")
    print("Evaluating and storing results of ae1...",end="")
    temp=ae1.evaluate(x_test,x_test,verbose=2)
    Results['ae1_loss'],Results['ae1_mse'],Results['ae1_mae']=temp[0],temp[1],temp[2]
    print("done!")
    
    #Saving ae1
    print("Saving ae1...",end="")
    path='SavedModels_In_h5format/Exp1/fold_{}/ae1.hdf5'.format(i)
    ae1.save(path)
    ae1.save('SavedModels_In_SavedModels/Exp1/fold_{}/ae1'.format(i))
    print("done!")
    
    #Preparing data for autoencoder-2
    print("Preparing data for ae2 by loading ae1...",end="")
    ae1_new=tf.keras.models.load_model('SavedModels_In_SavedModels/Exp1/fold_{}/ae1'.format(i))
    ae1_encoder=Model(ae1_input_layer,ae1_bottle_neck)
    ae1_encoder.layers[1].set_weights([ae1_new.get_weights()[0],ae1_new.get_weights()[1]])
    ae2_X_train=ae1_encoder.predict(x_train)
    ae2_X_test=ae1_encoder.predict(x_test)
    print("done!")

    #Autoencoder-2 architecture
    print("Constructing ae2 architecture...",end="")
    input_size=ae2_X_train.shape[1]
    ae2_input_layer=Input((input_size))
    ae2_input_layer_noise=GaussianNoise(0.2)(ae2_input_layer)
    ae2_bottle_neck=Dense(600,activation='relu')(ae2_input_layer)
    ae2_ouput_layer=Dense(input_size,activation='relu')(ae2_bottle_neck)
    ae2=Model(ae2_input_layer,ae2_ouput_layer)
    print("done!")
    print("Configuring ae2...",end="")
    ae2.compile(optimizer='adam',loss='mse',metrics=['mse','mae'])
    print("done!")
    #ae2.summary()

    #Training ae2
    print("Training ae1...",end="")
    ae2.fit(ae2_X_train,ae2_X_train,epochs=100,batch_size=16,validation_split=0.1,shuffle=True)
    print("done!")
    print("Evaluating and storing results of ae2...",end="")
    temp=ae2.evaluate(ae2_X_test,ae2_X_test,verbose=2)
    Results['ae2_loss'],Results['ae2_mse'],Results['ae2_mae']=temp[0],temp[1],temp[2]
    print("done!")
    #Saving ae2
    print("Saving ae1...",end="")
    ae2.save('SavedModels_In_h5format/Exp1/fold_{}/ae2.hdf5'.format(i))
    ae2.save('SavedModels_In_SavedModels/Exp1/fold_{}/ae2'.format(i))
    print("done!")
    
    #Loading Models for Transfer Learning
    print("Loading ae2")
    ae2_new=tf.keras.models.load_model('SavedModels_In_SavedModels/Exp1/fold_{}/ae2'.format(i))
    print("done!")
    #MLP
    print("Constructing architecture of mlp...",end="")
    mlp=Sequential()
    mlp.add(Dense(1000,activation='relu',input_dim=19900))
    mlp.add(Dense(600,activation='relu'))
    mlp.add(Dense(1,activation='sigmoid'))
    print("done!")
    print("Configuring mlp...",end="")
    mlp.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
    print("done!")
    print("Freezing layers of mlp...",end="")
    mlp.layers[0].set_weights([ae1_new.get_weights()[0],ae1_new.get_weights()[1]])
    mlp.layers[0].trainable=False
    mlp.layers[1].set_weights([ae2_new.get_weights()[0],ae2_new.get_weights()[1]])
    mlp.layers[1].trainable=False
    print("done!")
    #mlp.summary()
    print("Training mlp...",end="")
    mlp.fit(X_train,Y_train,epochs=100,batch_size=16,validation_split=0.1,shuffle=True, callbacks=[tensorboard])
    print("done!")
    
    print("Saving mlp...",end="")
    ae2.save('SavedModels_In_h5format/Exp1/fold_{}/mlp.hdf5'.format(i))
    ae2.save('SavedModels_In_SavedModels/Exp1/fold_{}/mlp'.format(i))
    print("done!")
    
    print("Evaluating and storing results of mlp...",end="")
    temp=mlp.evaluate(X_test,Y_test,verbose=2)
    Results['mlp_loss'],Results['mlp_accuracy']=temp[0],temp[1]
    print("done!")
    return Results

In [None]:
n_folds=10
i=0
skf = StratifiedKFold(10, shuffle=True)
for i, (train, test) in enumerate(skf.split(X, Y)):
        print("Running Fold", i+1, "/", n_folds)
        x_train=np.array([X[i] for i in train])
        y_train=np.array([Y[i] for i in train])
        x_test=np.array([X[i] for i in test])
        y_test=np.array([Y[i] for i in test])
        print(train_and_evaluate_model(x_train,y_train,x_test,y_test,i+1))
        i=i+1

In [None]:
#fold_1
{'ae1_loss': 0.04122388821381789, 'ae1_mse': 0.041223887, 'ae1_mae': 0.16235116, 'ae2_loss': 0.00983807473228528, 
 'ae2_mse': 0.009838074, 'ae2_mae': 0.0044339993, 
 'mlp_loss': 2.2273578490966406, 'mlp_accuracy': 0.69871795}
#fold_2
{'ae1_loss': 0.041735986390939124, 'ae1_mse': 0.041735984, 'ae1_mae': 0.16299812, 'ae2_loss': 0.00568302097515418, 
 'ae2_mse': 0.0056830207, 'ae2_mae': 0.002652438, 
 'mlp_loss': 2.457437793413798, 'mlp_accuracy': 0.6923077}
#fold_3
{'ae1_loss': 0.041662152856588364, 'ae1_mse': 0.041662153, 'ae1_mae': 0.16314024, 'ae2_loss': 0.003184114332095935,
 'ae2_mse': 0.0031841146, 'ae2_mae': 0.0019735892, 
 'mlp_loss': 2.4281914356427317, 'mlp_accuracy': 0.6602564}
#fold_4
{'ae1_loss': 0.04324208257290033, 'ae1_mse': 0.043242082, 'ae1_mae': 0.16591014, 'ae2_loss': 0.0021350136420761165,
 'ae2_mse': 0.0021350137, 'ae2_mae': 0.0016863676, 
 'mlp_loss': 2.5261729863973765, 'mlp_accuracy': 0.67948717}
#fold_5
{'ae1_loss': 0.04028723560846769, 'ae1_mse': 0.040287238, 'ae1_mae': 0.16018154, 'ae2_loss': 0.0011480520512962427, 
 'ae2_mse': 0.001148052, 'ae2_mae': 0.0016268685, 
 'mlp_loss': 2.4056292191529884, 'mlp_accuracy': 0.6858974}
#fold_6
{'ae1_loss': 0.04346959274804708, 'ae1_mse': 0.04346959, 'ae1_mae': 0.1668984, 'ae2_loss': 0.0031298403311701656, 
 'ae2_mse': 0.0031298404, 'ae2_mae': 0.0025571962, 
 'mlp_loss': 2.1884971276307716, 'mlp_accuracy': 0.69871795}
#fold_7
{'ae1_loss': 0.04105956664363158, 'ae1_mse': 0.04105957, 'ae1_mae': 0.16167267, 'ae2_loss': 0.0086138850819428,
 'ae2_mse': 0.008613885, 'ae2_mae': 0.0026781328, 
 'mlp_loss': 2.5758261619470058, 'mlp_accuracy': 0.6602564}
#fold_8
{'ae1_loss': 0.042440927542239715, 'ae1_mse': 0.04244093, 'ae1_mae': 0.16471367, 'ae2_loss': 0.006808891734198749,
 'ae2_mse': 0.006808892, 'ae2_mae': 0.0036072638, 
 'mlp_loss': 2.1458789446415047, 'mlp_accuracy': 0.6474359}
#fold_9
{'ae1_loss': 0.04173218314219447, 'ae1_mse': 0.04173218, 'ae1_mae': 0.16241072, 'ae2_loss': 0.0006675669974003222,
 'ae2_mse': 0.00066756696, 'ae2_mae': 0.0012192972, 
 'mlp_loss': 2.428297372964712, 'mlp_accuracy': 0.6474359}
#fold_10
{'ae1_loss': 0.042473170761633844, 'ae1_mse': 0.04247317, 'ae1_mae': 0.16484436, 'ae2_loss': 0.003876802728520435, 
 'ae2_mse': 0.0038768027, 'ae2_mae': 0.0022777137, 
 'mlp_loss': 2.3337640212132382, 'mlp_accuracy': 0.7051282}

In [None]:
import numpy as np
avg_acc=np.array([0.69871795,0.6923077,0.6602564,0.67948717,0.6858974,0.69871795,0.6602564,0.6474359,0.6474359,0.7051282])
avg_acc=(np.sum(avg_acc)/10)*100
print(avg_acc)