In [None]:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, BatchNormalization, Dropout
from tensorflow.keras import optimizers
import matplotlib.pyplot as plt 
import numpy as np 
import pandas as pd 
import seaborn as sns 
from datetime import datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report, f1_score

In [None]:
tf.random.set_seed(13)
tf.debugging.set_log_device_placement(False)

In [None]:
#GLOBAL
path='/content/drive/MyDrive/Colab Notebooks/Second semester/Data/full_dataset.csv'

In [None]:
#import processed dataset, randomise and show the dimensions
all_ds = pd.read_csv(path)
all_ds = all_ds.sample(frac=1)
all_ds.shape

In [None]:
#remove index column 
all_ds.pop('Unnamed: 0')

In [None]:
#Show the distribution of subcategories 
all_ds['subcategory'].value_counts(dropna=False)

In [None]:
#split the dataset into train, test and validate sets (0.8, 0.1, 0.1)
train_dataset, temp_test_dataset =  train_test_split(all_ds, test_size=0.2)
test_dataset, valid_dataset =  train_test_split(temp_test_dataset, test_size=0.5)

In [None]:
#get sd, min, max, 25th, 50th and 75th percentiles of the train data
train_stats = train_dataset.describe()
train_stats.pop("subcategory")
train_stats = train_stats.transpose()

In [None]:
#remove subcategory from x data
train_labels1 = train_dataset.pop('subcategory')
test_labels1 = test_dataset.pop('subcategory')
valid_labels1 = valid_dataset.pop('subcategory')

In [None]:
#one hot encode the output label
train_labels = pd.get_dummies(train_labels1, prefix='Label')
valid_labels = pd.get_dummies(valid_labels1, prefix='Label')
test_labels = pd.get_dummies(test_labels1, prefix='Label')

In [None]:
#Global

y_list=[train_labels,test_labels,valid_labels]
for data in y_list:
  if data.shape[1]!=11:
    data.insert(7, 'Label_7', 0)

In [None]:
#normalise the x data
def norm(x):
    return (x - train_stats['min']) / (train_stats['max']-train_stats['min'])
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)
normed_valid_dataset = norm(valid_dataset)

In [None]:
#train two layer neural network
start = datetime.now()
def build_model1_two_hidden_layers():
    model = Sequential()
    model.add(Dense(16, input_shape = (normed_train_data.shape[1],)))         
    model.add(Dense(train_labels.shape[1], activation='softmax'))                         
    learning_rate = 0.0001
    optimizer = optimizers.Adam(learning_rate)
    model.compile(loss='categorical_crossentropy',#from_logits=True),
                optimizer=optimizer,
                metrics=['accuracy']) 
    return model


EPOCHS = 10
batch_size = 16 

model = build_model1_two_hidden_layers()
print('Here is a summary of this model: ')
model.summary()


with tf.device('/CPU:0'): 
    history = model.fit(
        normed_train_data, 
        train_labels,
        batch_size = batch_size,
        epochs=EPOCHS, 
        verbose=1,
        shuffle=True,
        steps_per_epoch = int(normed_train_data.shape[0] / batch_size) ,
        validation_data = (normed_valid_dataset,valid_labels)   
    )
end = datetime.now()

In [None]:
savemodel='/content/drive/MyDrive/Colab Notebooks/Second semester/Models/NN/local/model/device1'
saveweights='/content/drive/MyDrive/Colab Notebooks/Second semester/Models/NN/local/weights/device1'

In [None]:
#save model and its weights
model.save(savemodel)
model.save_weights(saveweights)

In [None]:
#display train time
print(end-start)

In [None]:
predict_results = model.predict(normed_test_data)

In [None]:
#print precision, recall and f1 score of each label
sk_report = classification_report(digits=4,y_true=test_labels1,y_pred=predict_results)
sk_report

In [None]:
#confusion matrix
ax= plt.subplot()
normed_test_data.append(normed_valid_dataset)
test_labels1.append(valid_labels1)
predict_results = model.predict(normed_test_data)

predict_results= predict_results.argmax(axis = 1)
cm = confusion_matrix(test_labels1, predict_results)
sns.heatmap(cm, annot=True, ax = ax); 
ax.set_xlabel('Predicted labels');ax.set_ylabel('True labels'); 
ax.set_title('Confusion Matrix'); 

In [None]:
#plot model loss against epochs
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('epoch')
plt.legend(['Train', 'Cross-Validation'], loc='upper left')
plt.show()


In [None]:
#plot accuracy against epochs
from matplotlib import pyplot as plt
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['Train', 'Cross-Validation'], loc='upper left')
plt.show()

In [None]:
#calculate precision, f1 score, accuracy and recall of the test and validate dataset
precision_score(test_labels1, predict_results, average='micro')

In [None]:
f1_score(test_labels1, predict_results, average='micro')

In [None]:
accuracy_score(test_labels1, predict_results)

In [None]:
recall_score(test_labels1, predict_results, average='micro')