In [None]:
import random
import sys
import math

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
import keras
import seaborn as sns
from math import floor, ceil
from pylab import rcParams
from sklearn import svm

%matplotlib inline

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
file_name = "AntimicrobialTrainingSequences.csv"
df = pd.read_csv(file_name, low_memory=False)

target = "Antimicrobial"

test_proportion = 0.2
bool_target = "bool_{}".format(target)

col_list = df.columns.where(((df.columns==target).astype(int) + (~df.isnull().any()).astype(int)).astype(bool)).dropna()
df = df.loc[:, col_list]

features = ["InterfaceHydrophobicity","OctanolHydrophobicity","GRAVY","TotalCharge",\
            "TotalPositiveCharge","TotalNegativeCharge","AveragePositivePosition","AverageNegativePosition",\
            "Weight","hmol","hmol_pos","hmol_neg","smol_pos","smol_neg","ave_smol","ave_hmol_pos",\
            "ave_hmol_neg","ave_smol_pos","ave_smol_neg","hmol_pos_largest","hmol_neg_largest",\
            "hmol_pos_smallest","hmol_neg_smallest","eo_pos","sigma","stericmol_largest","stericmol_smallest",\
            "stericatom_largest","stericatom_smallest"]
            
df[features] = scaler.fit_transform(df[features].to_numpy())

inputX = df[features]
test = df.loc[[random.random() < test_proportion for i in range(len(df))],:]
train = df.loc[~df.index.isin(test.index),:]
train_X = train[features]
train_y = train[["Antimicrobial"]]
test_X = test[features]
test_y = test[["Antimicrobial"]]

train_len = len(train)
test_len = len(test)

num_features = len(train_X.columns)

#print(df)

In [None]:
train_y.describe()

In [None]:
train_X = np.array(train_X).astype(float)
train_y = np.array(train_y).astype(float)
test_X = np.array(test_X).astype(float)
test_y = np.array(test_y).astype(float)

In [None]:
"""
classifier = svm.SVC(gamma=0.001)
print(train_y.shape)
classifier.fit(train_X, train_y) 
"""
from keras.models import Sequential
from keras.layers import Dense
from keras import optimizers

In [None]:
classifier = Sequential()

In [None]:
# build the model
init = keras.initializers.RandomUniform(-1, 1)

#First Hidden Layer
classifier.add(Dense(16, activation='relu', kernel_initializer=init, input_dim=len(features)))
#Output Layer
classifier.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal'))

In [None]:
#Compiling the neural network
adam = optimizers.Adam(lr=0.001)
classifier.compile(optimizer =adam,loss='binary_crossentropy', metrics =['accuracy'])

In [None]:
# Fitting the data to the training dataset
history = classifier.fit(train_X,train_y, batch_size=400, epochs=400,validation_data=(test_X,test_y))

In [None]:
eval_model=classifier.evaluate(train_X, train_y)
eval_model
# loss and accuracy

In [None]:
eval_model=classifier.evaluate(test_X, test_y)
eval_model
# loss and accuracy

In [None]:
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# make predictions
y_pred=classifier.predict(test_X)
y_pred =(y_pred>0.5)
y_pred1=classifier.predict(train_X)
y_pred1 =(y_pred1>0.5)

In [None]:
# statistics on model
from sklearn.metrics import confusion_matrix
cm1 = confusion_matrix(train_y, y_pred1)
cm = confusion_matrix(test_y, y_pred)
print("Training data")
print(cm1)
print("Testing data")
print(cm)

# TN FP <- should be negative
# FN TP <- should be positive

In [None]:
tp1 = cm1[1][1]
fp1 = cm1[0][1]
tn1 = cm1[0][0]
fn1 = cm1[1][0]

F1a = tp1 / (tp1 + fp1)
acc1 = (tp1 + tn1)/(tp1+fp1+tn1+fn1)
sens1 = tp1 / (tp1 + fn1)
sens1 = tp1 / (tp1 + fn1) 
spec1 = tn1 / (tn1 + fp1) 
mcc1 = (tp1*tn1-fp1*fn1)/math.sqrt((tp1+fp1)*(tp1+fn1)*(tn1+fp1)*(tn1+fn1))
pretestProb1 = (tp1 + fn1)/(tp1+fp1+tn1+fn1)
print()
print("Training data")
print("acc:", acc1)
print("F1:",F1a)
print("sens:",sens1)
print("spec:",spec1)
print("mcc:",mcc1)
print("pretestProb:",pretestProb1)


tp = cm[1][1]
fp = cm[0][1]
tn = cm[0][0]
fn = cm[1][0]

acc = (tp + tn)/(tp+fp+tn+fn) 
F1 = tp / (tp + fp) 
sens = tp / (tp + fn) 
spec = tn / (tn + fp) 
mcc = (tp*tn-fp*fn)/math.sqrt((tp+fp)*(tp+fn)*(tn+fp)*(tn+fn)) 
pretestProb = (tp + fn)/(tp+fp+tn+fn)
print()
print("Testing data")
print("acc:", acc)
print("F1:",F1)
print("sens:",sens)
print("spec:",spec)
print("mcc:",mcc)
print("pretestProb:",pretestProb)