In [57]:
# Libraries for developing a Neural Network
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.utils import to_categorical 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
from sklearn.preprocessing import StandardScaler
from keras.losses import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
sns.set_style('darkgrid')
plt.rcParams['figure.dpi'] = 108

from collections import defaultdict
import random
from random import randrange
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


from sklearn.metrics import roc_auc_score

In [58]:
dt = pd.read_csv("model_data/data.csv",sep ="\t")

dt['Real']=dt.apply(lambda x: x.Real if x.Real ==1 else 0,axis =1)
y = dt.Real
test_dt =pd.read_csv("model_data/test_data.csv",sep ="\t")
ds = dt.drop('Real',axis =1)

In [59]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(ds, y, test_size=0.2, random_state=1)
print("Training set has {} instances. Test set has {} instances.".format(X_train.shape[0], X_test.shape[0]))

Training set has 6364 instances. Test set has 1591 instances.


In [60]:
poly =PolynomialFeatures(2,include_bias = False)
poly.fit(X_train)
X_train= poly.transform(X_train)
X_test= poly.transform(X_test)
test_dt = poly.transform(test_dt)

In [61]:
# standard
scaler = StandardScaler()
scaler.fit(X_train)
X_train= scaler.transform(X_train)
X_test= scaler.transform(X_test)
#dt = scaler.transform(dt)
test_dt = scaler.transform(test_dt)
#dt['Real'] =dt.apply(lambda x: 0 if x.Real ==-1 else 1, axis =1)


In [62]:
def auroc(y_true, y_pred):
    return tf.py_function(roc_auc_score, (y_true, y_pred), tf.double)


In [68]:
# Model arguments
args = dict(x = X_train,
            y = y_train,
            epochs=50,
            validation_split=0.2,
            verbose=2,
            shuffle=True)

input_layer = Input(shape=(20,))
hidden_layer_1 = Dense(100, activation='relu', units=64,
    kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),
    bias_regularizer=regularizers.l2(1e-4),activity_regularizer=regularizers.l2(12e-5))(input_layer)

#hidden_layer_1 = Dropout(0.3)(hidden_layer_1)
#hidden_layer_2 = Dense(8, activation='sigmoid')(hidden_layer_1)
hidden_layer_2 = Dense(50, activation='relu')(hidden_layer_1)
hidden_layer_2 = Dropout(0.3)(hidden_layer_2)
output_layer = Dense(2, activation='softmax')(hidden_layer_2)
model = Model(inputs=input_layer, outputs=output_layer)

# Model set up
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy', auroc])
model.compile(tf.keras.optimizers.RMSprop(learning_rate=0.01),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()
#model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy', auroc])

#%%

# Execute training
model.fit(**args)



Model: "model_12"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_13 (InputLayer)        [(None, 20)]              0         
_________________________________________________________________
dense_36 (Dense)             (None, 100)               2100      
_________________________________________________________________
dense_37 (Dense)             (None, 50)                5050      
_________________________________________________________________
dropout_9 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_38 (Dense)             (None, 2)                 102       
Total params: 7,252
Trainable params: 7,252
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
160/160 - 0s - loss: 0.4932 - accuracy: 0.7535 - val_loss: 0.3952 - val_accuracy: 0.8610
Epoch 2/50
160/160 - 0

<tensorflow.python.keras.callbacks.History at 0x7fdfe419b410>

In [69]:


# Prediction for test
predict = model.predict(X_test)
classes = []
classes = predict.argmax(axis=-1)

#%%

print('Accuracy: ' + "{:.4f}".format(accuracy_score(y_test, classes)))
print(classes)

#%%

print(predict)

#%%

auc = roc_auc_score(y_test, classes)
print(auc)


Accuracy: 0.9158
[1 1 1 ... 1 1 1]
[[0.0000000e+00 1.0000000e+00]
 [2.0766181e-01 7.9233819e-01]
 [3.2057233e-08 1.0000000e+00]
 ...
 [2.0106381e-05 9.9997985e-01]
 [2.3658115e-01 7.6341885e-01]
 [1.6746019e-06 9.9999833e-01]]
0.9160401002506265


In [70]:
predict_test = model.predict(test_dt)
print(predict_test)

[[0.34621134 0.6537887 ]
 [0.1775368  0.8224632 ]
 [0.10879969 0.8912003 ]
 ...
 [0.14160302 0.85839695]
 [0.2523072  0.7476928 ]
 [0.99181926 0.00818073]]


In [71]:
classes = []
classes = predict_test.argmax(axis=-1)


#%%

prediction = pd.DataFrame([])
i = 0
for row in predict_test:
    i += 1
    prediction = prediction.append(pd.DataFrame({'Id': i, 'Predicted': row[1]},
                                                index=[0]), ignore_index=True) 
prediction.to_csv("predictions/result5.csv", sep=',', index=False)
