In [1]:
# General libraries
import pandas as pd
import numpy as np
from collections import defaultdict
import random
from random import randrange
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Libraries for developing a Neural Network
import tensorflow as tf 
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.utils import to_categorical 

In [2]:
dataset = pd.read_csv("../data_models/dataset.csv", sep='\t')

In [3]:
# Shuffle the data set
dataset = dataset.sample(frac=1).reset_index(drop=True)
# Filling NaN values with zeros. Not sure if it is correct.
dataset['Com_Followings_Ratio'] = dataset['Com_Followings_Ratio'].fillna(0)

In [37]:
msk = np.random.rand(len(dataset)) < 0.8

Y = dataset['Real']
X = dataset.drop(columns=['Real','Sink','Source'])

# Normalization
X = (X-X.min())/(X.max()-X.min())

X_train = X[msk]
X_test = X[~msk]
Y_train = Y[msk]
Y_test = Y[~msk]

In [57]:
# Model arguments
args = dict(x = X_train,
            y = Y_train,
            epochs=100,
            validation_split=0.2,
            verbose=2,
            shuffle=True)

# Layer definition
input_layer = Input(shape=(6,))
hidden_layer_1 = Dense(16, activation='relu')(input_layer)
#hidden_layer_1 = Dropout(0.3)(hidden_layer_1)
#hidden_layer_2 = Dense(8, activation='sigmoid')(hidden_layer_1)
hidden_layer_2 = Dense(8, activation='relu')(hidden_layer_1)
#hidden_layer_2 = Dropout(0.3)(hidden_layer_2)
output_layer = Dense(2, activation='softmax')(hidden_layer_2)
model = Model(inputs=input_layer, outputs=output_layer)

# Model set up
model.compile(tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics =['accuracy'])

model.summary()

Model: "functional_39"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_20 (InputLayer)        [(None, 6)]               0         
_________________________________________________________________
dense_57 (Dense)             (None, 16)                112       
_________________________________________________________________
dense_58 (Dense)             (None, 8)                 136       
_________________________________________________________________
dense_59 (Dense)             (None, 2)                 18        
Total params: 266
Trainable params: 266
Non-trainable params: 0
_________________________________________________________________


In [58]:
# Execute training
model.fit(**args)

Epoch 1/100
159/159 - 0s - loss: 0.6618 - accuracy: 0.6576 - val_loss: 0.6409 - val_accuracy: 0.6675
Epoch 2/100
159/159 - 0s - loss: 0.6082 - accuracy: 0.6865 - val_loss: 0.5983 - val_accuracy: 0.6926
Epoch 3/100
159/159 - 0s - loss: 0.5717 - accuracy: 0.6994 - val_loss: 0.5717 - val_accuracy: 0.7060
Epoch 4/100
159/159 - 0s - loss: 0.5483 - accuracy: 0.7181 - val_loss: 0.5541 - val_accuracy: 0.7280
Epoch 5/100
159/159 - 0s - loss: 0.5294 - accuracy: 0.7305 - val_loss: 0.5376 - val_accuracy: 0.7264
Epoch 6/100
159/159 - 0s - loss: 0.5153 - accuracy: 0.7404 - val_loss: 0.5267 - val_accuracy: 0.7390
Epoch 7/100
159/159 - 0s - loss: 0.5037 - accuracy: 0.7490 - val_loss: 0.5212 - val_accuracy: 0.7555
Epoch 8/100
159/159 - 0s - loss: 0.4977 - accuracy: 0.7577 - val_loss: 0.5145 - val_accuracy: 0.7626
Epoch 9/100
159/159 - 0s - loss: 0.4904 - accuracy: 0.7602 - val_loss: 0.5092 - val_accuracy: 0.7618
Epoch 10/100
159/159 - 0s - loss: 0.4857 - accuracy: 0.7659 - val_loss: 0.5068 - val_accura

Epoch 82/100
159/159 - 0s - loss: 0.4244 - accuracy: 0.8145 - val_loss: 0.4403 - val_accuracy: 0.8082
Epoch 83/100
159/159 - 0s - loss: 0.4258 - accuracy: 0.8137 - val_loss: 0.4391 - val_accuracy: 0.8137
Epoch 84/100
159/159 - 0s - loss: 0.4237 - accuracy: 0.8159 - val_loss: 0.4464 - val_accuracy: 0.8192
Epoch 85/100
159/159 - 0s - loss: 0.4241 - accuracy: 0.8198 - val_loss: 0.4374 - val_accuracy: 0.8129
Epoch 86/100
159/159 - 0s - loss: 0.4228 - accuracy: 0.8151 - val_loss: 0.4493 - val_accuracy: 0.7956
Epoch 87/100
159/159 - 0s - loss: 0.4219 - accuracy: 0.8147 - val_loss: 0.4366 - val_accuracy: 0.8168
Epoch 88/100
159/159 - 0s - loss: 0.4224 - accuracy: 0.8171 - val_loss: 0.4387 - val_accuracy: 0.8215
Epoch 89/100
159/159 - 0s - loss: 0.4211 - accuracy: 0.8173 - val_loss: 0.4350 - val_accuracy: 0.8184
Epoch 90/100
159/159 - 0s - loss: 0.4199 - accuracy: 0.8177 - val_loss: 0.4347 - val_accuracy: 0.8168
Epoch 91/100
159/159 - 0s - loss: 0.4195 - accuracy: 0.8177 - val_loss: 0.4356 - v

<tensorflow.python.keras.callbacks.History at 0x141e54430>

In [59]:
# Prediction for test
predict = model.predict(X_test)
classes = predict.argmax(axis=-1)

In [60]:
print(classes)
print('Accuracy: ' + "{:.4f}".format(accuracy_score(Y_test, classes)))

[0 0 0 ... 0 1 1]
Accuracy: 0.8218


In [61]:
print(predict)

[[0.7659865  0.23401344]
 [0.62242365 0.37757638]
 [0.8159179  0.18408205]
 ...
 [0.74911624 0.25088376]
 [0.07113406 0.92886597]
 [0.14539103 0.85460895]]


In [62]:
# accuracy 
#print (classification_report(Y_test, predict,digits = 6))
#print (confusion_matrix(Y_test, predict))
#print (accuracy_score(Y_test, predict))

In [65]:
# Reading test data
test_data = pd.read_csv("../data_models/test_data.csv", sep='\t')
test_data['Com_Followings_Ratio'] = test_data['Com_Followings_Ratio'].fillna(0)
# Normalization
test_data = (test_data-test_data.min())/(test_data.max()-test_data.min())
test_data = test_data.drop(columns=['Sink','Source'])

In [66]:
predict_test = model.predict(test_data)
predict_test 

array([[0.16000684, 0.8399931 ],
       [0.33275717, 0.6672429 ],
       [0.2162076 , 0.78379244],
       ...,
       [0.6963033 , 0.30369675],
       [0.6458285 , 0.3541715 ],
       [0.70154905, 0.29845092]], dtype=float32)

In [67]:
classes = predict_test.argmax(axis=-1)
classes

array([1, 1, 1, ..., 0, 0, 0])

In [68]:
prediction = pd.DataFrame([])
i = 0
for row in predict_test:
    i += 1
    prediction = prediction.append(pd.DataFrame({'Id': i, 'Predicted': row[1]},
                                                index=[0]), ignore_index=True) 
prediction.to_csv("../predictions/prediction_2020-09-12.csv", sep=',', index=False)