In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage.filters import uniform_filter1d, gaussian_filter
from sklearn.metrics import accuracy_score, precision_score, recall_score, \
                            confusion_matrix, fbeta_score, precision_recall_curve, \
                            average_precision_score, auc
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten,Input,  Activation
from keras.models import load_model, Model,Sequential,model_from_json
from keras.layers import LSTM, Reshape,GRU
from keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')
from inspect import signature
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import cv2

In [None]:
def main():
    print("Loading datasets...")
    train = pd.read_csv("/content/gdrive/My Drive/exoTrain.csv", encoding= "ISO-8859-1") 
    test = pd.read_csv("/content/gdrive/My Drive/exoTest.csv", encoding= "ISO-8859-1") 
    x_train = train.drop('LABEL', axis=1)
    x_test = test.drop('LABEL', axis=1)
    y_train = train.LABEL
    y_test = test.LABEL
    x_train = np.array(x_train)
    y_train = np.array(y_train).reshape((-1,1))-1
    x_test = np.array(x_test)
    y_test = np.array(y_test).reshape((-1,1))-1 
    
    x_train = np.append(x_train, np.flip(x_train[0:37,:], axis=-1), axis=0)
    y_train = np.append(y_train, y_train[0:37]).reshape((-1,1)) 
    x_test = np.append(x_test, np.flip(x_test[0:5,:], axis=-1), axis=0)
    y_test = np.append(y_test, y_test[0:5]).reshape((-1,1))
        
    x_train = ((x_train - np.mean(x_train, axis=1).reshape(-1,1)) / np.std(x_train, axis=1).reshape(-1,1))
    x_test = ((x_test - np.mean(x_test, axis=1).reshape(-1,1)) / np.std(x_test, axis=1).reshape(-1,1))
    x_test = x_test.reshape(x_test.shape[0],x_test.shape[1],-1)
    print("x_test shape",x_test.shape)
    print("y_test",y_test.shape)
 
    model = Sequential()  
    model.add(LSTM(100,input_shape=(3197,1),return_sequences=True))
    model.add(Dropout(0.1))
    model.add(LSTM(100,return_sequences=True))
    model.add(Dropout(.1))
    model.add(LSTM(100,return_sequences=False))
    model.add(Dropout(.1))
    model.add(Dense(1,activation="sigmoid"))
    
    def shuffle_in_unison(a, b):    
      rng_state = np.random.get_state()
      np.random.shuffle(a)
      np.random.set_state(rng_state)
      np.random.shuffle(b)
    
    def batch_generator(x_train, y_train, batch_size=32):
        half_batch = batch_size // 2
        x_batch = np.empty((batch_size, x_train.shape[1], 1), dtype='float32') 
        y_batch = np.empty((batch_size, y_train.shape[1]), dtype='float32') 
 
        while True:
            pos_idx = np.where(y_train[:,0] == 1)[0]
            neg_idx = np.where(y_train[:,0] == 0)[0]
            
            np.random.shuffle(pos_idx)
            np.random.shuffle(neg_idx)
 
            x_batch[:half_batch] = x_train[pos_idx[:half_batch]].reshape(half_batch,x_train.shape[1],-1)
            x_batch[half_batch:] = x_train[neg_idx[half_batch:batch_size]].reshape(half_batch,x_train.shape[1],-1)
            y_batch[:half_batch] = y_train[pos_idx[:half_batch]]
            y_batch[half_batch:] = y_train[neg_idx[half_batch:batch_size]]
            shuffle_in_unison(x_batch,y_batch)
            yield x_batch, y_batch
 
    model.compile(optimizer=Adam(1e-5), loss = 'binary_crossentropy', metrics=['accuracy'])
    hist = model.fit_generator(batch_generator(x_train, y_train, 32),validation_data=(x_test, y_test),verbose=0, epochs=5,steps_per_epoch=x_train.shape[0]//32)
 
    model.compile(optimizer=Adam(4e-5), loss = 'binary_crossentropy', metrics=['accuracy'])
    hist = model.fit_generator(batch_generator(x_train, y_train, 32),validation_data=(x_test, y_test),verbose=2, epochs=50,steps_per_epoch=x_train.shape[0]//32)
 
    model_json = model.to_json()
    with open("/content/gdrive/My Drive/LSTM_model.json", "w") as  json_file:
      json_file.write(model_json)
    model.save_weights("/content/gdrive/My Drive/LSTM_model.h5")
    print("Saved model to disk")
    plt.plot(hist.history['loss'], color='b',label='loss')
    plt.plot(hist.history['val_loss'], color='r',label='validation loss')
    plt.title('Loss')
    plt.xlabel('Epochs')
    plt.legend(loc='upper right')
    plt.show()
    plt.plot(hist.history['accuracy'], color='b',label='accuracy')
    plt.plot(hist.history['val_accuracy'], color='r',label='validation accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.legend(loc='upper right')
    plt.show()
    
    print("Make predictions for training data")
    shuffle_in_unison(x_train,y_train)
    y_pred = model.predict(x_train.reshape(x_train.shape[0],x_train.shape[1],-1))[:,0]
    pred = np.empty((1,len(y_pred)), dtype=object)
    pred = np.where(y_pred>=0.5, 1, 0)
    y_train = np.reshape(y_train,len(y_train))
    pred = np.reshape(pred,len(pred))
    
    print("Create confusion matrix for training data")
    print('Validation for training data:')
    conf_matrix = pd.crosstab(y_train, pred)
    print(conf_matrix)
    
    accuracy = accuracy_score(y_train, pred)
    precision = precision_score(y_train, pred)
    recall = recall_score(y_train, pred)
    fbeta = fbeta_score(y_train, pred, 1)
    print('Accuracy: %.3f Precision: %.3f Recall: %.3f F_beta: %.3f' % (accuracy, precision, recall, fbeta))
    
    precision, recall, thresholds = precision_recall_curve(y_train, y_pred, pos_label=1)
    auc_pr = auc(recall, precision)
    print('Area under precision-recall-curve: %.3f' % (auc_pr))
    step_kwargs = ({'step': 'post'} if 'step' in signature(plt.fill_between).parameters else {})
    plt.step(recall, precision, color='b', alpha=0.2,
             where='post')
    plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall Curve')
    plt.show()
 
    shuffle_in_unison(x_test,y_test)
    y_pred = model.predict(x_test)[:,0] 
    pred = np.empty((1,len(y_pred)), dtype=object)
    pred = np.where(y_pred>=0.5, 1, 0)
    y_test = np.reshape(y_test,len(y_test))
    pred = np.reshape(pred,len(pred))
    
 
    print('Validation for test data:')
    conf_matrix = pd.crosstab(y_test, pred)
    print(conf_matrix)
 
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred)
    recall = recall_score(y_test, pred)
    fbeta = fbeta_score(y_test, pred, 1)
    print('Accuracy: %.3f Precision: %.3f Recall: %.3f F_beta: %.3f'% (accuracy, precision, recall, fbeta))
    
    precision, recall, thresholds = precision_recall_curve(y_test, y_pred, pos_label=1)
    auc_pr = auc(recall, precision)
    print('Area under precision-recall-curve: %.3f' % (auc_pr))
    step_kwargs = ({'step': 'post'} if 'step' in signature(plt.fill_between).parameters else {})
    plt.step(recall, precision, color='b', alpha=0.2,where='post')
    plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('Precision-Recall Curve')
    plt.show()

In [None]:
if __name__ == '__main__':
    print("In main")
    main()

In main
Loading datasets...
x_test shape (575, 3197, 1)
y_test (575, 1)
Epoch 1/50
160/160 - 58s - loss: 0.6589 - accuracy: 0.5992 - val_loss: 0.6568 - val_accuracy: 0.7739
Epoch 2/50
160/160 - 53s - loss: 0.6548 - accuracy: 0.5992 - val_loss: 0.6340 - val_accuracy: 0.7948
Epoch 3/50


In [None]:
json_file = open('/content/gdrive/My Drive/LSTM_model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights("/content/gdrive/My Drive/LSTM_model.h5") 
print("Loaded model from disk")
loaded_model.summary()

Loaded model from disk
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 3197, 100)         40800     
_________________________________________________________________
dropout (Dropout)            (None, 3197, 100)         0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 3197, 100)         80400     
_________________________________________________________________
dropout_1 (Dropout)          (None, 3197, 100)         0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 100)               80400     
_________________________________________________________________
dropout_2 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense (Dense)                (Non