In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import pandas as pd
import tensorflow as tf
from keras.utils import to_categorical
from sklearn.metrics import average_precision_score, precision_recall_fscore_support, accuracy_score, roc_curve, roc_auc_score
import numpy as np
import keras
import matplotlib.pyplot as plt

In [None]:
def save_model(model, path):
    tf.saved_model.save(model, path)
    # converter = tf.lite.TFLiteConverter.from_saved_model(path)
    # tflite_model = converter.convert()
    # open(path+".tflite", "wb+").write(tflite_model)

In [None]:
drive_path = "/content/drive/My Drive/Promoter Finding Algorithm/Homo Sapiens/"

train_file_path = drive_path + "train_data.csv"

df = pd.read_csv(train_file_path)
split_data = int(df.shape[0]*0.8)
train_data = df[:split_data]
test_data = df[split_data:]
train_labels = train_data.pop(train_data.columns[0])
train_labels_cat = to_categorical(train_labels, num_classes=2)
train_data = to_categorical(train_data.values.reshape(train_data.shape[0], 151, 1), num_classes=4)
test_labels = test_data.pop(test_data.columns[0])
test_labels_cat = to_categorical(test_labels, num_classes=2)
test_data = to_categorical(test_data.values.reshape(test_data.shape[0], 151, 1), num_classes=4)

In [None]:
early = tf.keras.callbacks.EarlyStopping(monitor="val_categorical_accuracy", mode="max", patience=5, verbose=1)
redonplat = tf.keras.callbacks.ReduceLROnPlateau(monitor="val_categorical_accuracy", mode="max", patience=3, verbose=2)
callbacks_list = [early, redonplat, tf.keras.callbacks.TensorBoard('logs')]

In [None]:
def get_compiled_model():
    input_layer = tf.keras.layers.Input(batch_shape=(None, 151, 4), name='input')
    conv_layer_1 = tf.keras.layers.Conv1D(filters=32, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(input_layer)
    bn_1 = tf.keras.layers.BatchNormalization()(conv_layer_1)
    conv_layer_2 = tf.keras.layers.Conv1D(filters=32, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(bn_1)
    bn_2 = tf.keras.layers.BatchNormalization()(conv_layer_2)
    avg_pool_1 = tf.keras.layers.AvgPool1D(pool_size = 2)(bn_2)
    dp_1 = tf.keras.layers.Dropout(0.5)(avg_pool_1)
    conv_layer_3 = tf.keras.layers.Conv1D(filters=64, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(dp_1)
    bn_3 = tf.keras.layers.BatchNormalization()(conv_layer_3)
    conv_layer_4 = tf.keras.layers.Conv1D(filters=64, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(bn_3)
    bn_4 = tf.keras.layers.BatchNormalization()(conv_layer_4)
    avg_pool_2 = tf.keras.layers.AvgPool1D(pool_size = 2)(bn_4)
    dp_2 = tf.keras.layers.Dropout(0.5)(avg_pool_2)
    conv_layer_5 = tf.keras.layers.Conv1D(filters=128, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(dp_2)
    bn_5 = tf.keras.layers.BatchNormalization()(conv_layer_5)
    conv_layer_6 = tf.keras.layers.Conv1D(filters=128, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(bn_5)
    bn_6 = tf.keras.layers.BatchNormalization()(conv_layer_6)
    avg_pool_3 = tf.keras.layers.AvgPool1D(pool_size = 2)(bn_6)
    dp_3 = tf.keras.layers.Dropout(0.5)(avg_pool_3)
    conv_layer_7 = tf.keras.layers.Conv1D(filters=256, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(dp_3)
    bn_7 = tf.keras.layers.BatchNormalization()(conv_layer_7)
    conv_layer_8 = tf.keras.layers.Conv1D(filters=256, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(bn_7)
    bn_8 = tf.keras.layers.BatchNormalization()(conv_layer_8)
    avg_pool_4 = tf.keras.layers.AvgPool1D(pool_size = 2)(bn_8)
    dp_4 = tf.keras.layers.Dropout(0.5)(avg_pool_4)
    # conv_layer_9 = tf.keras.layers.Conv1D(filters=256, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(dp_4)
    # bn_9 = tf.keras.layers.BatchNormalization()(conv_layer_9)
    # conv_layer_10 = tf.keras.layers.Conv1D(filters=256, kernel_size = 3, activation=tf.nn.relu6, strides=1, kernel_regularizer='L1L2')(bn_9)
    # bn_10 = tf.keras.layers.BatchNormalization()(conv_layer_10)
    # avg_pool_5 = tf.keras.layers.AvgPool1D(pool_size = 2)(bn_10)
    # dp_5 = tf.keras.layers.Dropout(0.5)(avg_pool_5)
    flat_1 = tf.keras.layers.Flatten()(dp_4)
    res_1 = tf.keras.layers.Reshape((1, flat_1.shape[1]))(flat_1)
    lstm_1 = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=1024, activation='relu', return_sequences=False))(res_1)
    flat_2 = tf.keras.layers.Flatten()(lstm_1)
    dense_1 = tf.keras.layers.Dense(units=1024, activation='relu')(flat_2)
    dense_2 = tf.keras.layers.Dense(2, activation='softmax')(dense_1)
    model = tf.keras.models.Model(inputs = input_layer, outputs = dense_2)
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss=tf.keras.losses.BinaryCrossentropy(), metrics=['categorical_accuracy'])
    model.summary()
    return model

In [None]:
model = get_compiled_model()
# model = tf.keras.models.load_model("model")

In [None]:
model.fit(train_data, train_labels_cat, epochs=100, validation_split=0.20, batch_size=32, callbacks=callbacks_list, shuffle=True)

In [None]:
pred_test = np.argmax(model.predict(test_data), axis = 1)
accuracy = accuracy_score(test_labels, pred_test)
print("Accuracy: ", accuracy)

In [None]:
cf_m = tf.math.confusion_matrix(test_labels, pred_test, num_classes=2)
print(cf_m)

In [None]:
r_c = roc_curve(test_labels, pred_test)
roc_sc = roc_auc_score(test_labels, pred_test)

In [None]:
print(r_c)
print(roc_sc)

In [None]:
save_model(model, drive_path + "model_homo-sapiens")

In [None]:
recall = precision_recall_fscore_support(test_labels, pred_test, average="binary")
print('Precision: {0:0.2f}'.format(recall[0]))
print('Recall: {0:1.2f}'.format(recall[1]))
print('F1-Score: {0:2.2f}'.format(recall[2]))


In [None]:
train_data[0]