In [88]:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dense, BatchNormalization, Dropout
from tensorflow.keras import optimizers
import matplotlib.pyplot as plt 
import numpy as np 
import pandas as pd 
import seaborn as sns 
from datetime import datetime
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report, f1_score

In [89]:
tf.random.set_seed(13)
tf.debugging.set_log_device_placement(False)

In [90]:
#LOCAL
train_path="/content/drive/MyDrive/Colab Notebooks/Second semester/Data/label_exclude/device1.csv"
test_path='/content/drive/MyDrive/Colab Notebooks/Second semester/Data/random_split/test.csv'

In [91]:
#LOCAL
test_ds=pd.read_csv(test_path)
train_ds=pd.read_csv(train_path)

In [None]:
test_ds.pop('Unnamed: 0')
train_ds.pop('Unnamed: 0')
test_ds.pop('Unnamed: 0.1')
train_ds.pop('Unnamed: 0.1')

In [None]:
train_ds.pop('Unnamed: 0.1.1')

In [None]:
test_ds

In [None]:
train_ds

In [96]:
#split the dataset into train, test and validate sets (0.8, 0.1, 0.1)
test_ds, valid_ds =  train_test_split(test_ds, test_size=0.5)

In [97]:
#LOCAL
test_ds['subcategory'].value_counts(dropna=False)

5    103213
3     97870
2     94909
6     61344
8      6508
7      1784
4       145
1       119
0        47
Name: subcategory, dtype: int64

In [98]:
train_ds['subcategory'].value_counts(dropna=False)

5    206754
3    195323
2    189397
6    123150
8     13049
7      3591
4       301
Name: subcategory, dtype: int64

In [99]:
valid_ds['subcategory'].value_counts(dropna=False)

5    103403
3     97709
2     94774
6     61638
8      6338
7      1792
4       140
1       106
0        40
Name: subcategory, dtype: int64

In [100]:
#get sd, min, max, 25th, 50th and 75th percentiles of the train data
train_stats = train_ds.describe()
train_stats.pop("subcategory")
train_stats = train_stats.transpose()

In [101]:
#remove subcategory from x data
train_labels1 = train_ds.pop('subcategory')
test_labels1 = test_ds.pop('subcategory')
valid_labels1 = valid_ds.pop('subcategory')

In [102]:
#one hot encode the output label
train_labels = pd.get_dummies(train_labels1, prefix='Label')
valid_labels = pd.get_dummies(valid_labels1, prefix='Label')
test_labels = pd.get_dummies(test_labels1, prefix='Label')

In [103]:
#Device split
train_labels.insert(0, 'Label_0', 0)
train_labels.insert(1, 'Label_1', 0)
#train_labels.insert(2, 'Label_2', 0)
#train_labels.insert(3, 'Label_3', 0)
#train_labels.insert(4, 'Label_4', 0)
#train_labels.insert(5, 'Label_5', 0)
#train_labels.insert(6, 'Label_6', 0)
#train_labels.insert(7, 'Label_7', 0)
#train_labels.insert(8, 'Label_8', 0)
train_labels

Unnamed: 0,Label_0,Label_1,Label_2,Label_3,Label_4,Label_5,Label_6,Label_7,Label_8
0,0,0,0,1,0,0,0,0,0
1,0,0,0,1,0,0,0,0,0
2,0,0,0,1,0,0,0,0,0
3,0,0,0,0,0,0,0,1,0
4,0,0,0,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...
731560,0,0,0,0,0,1,0,0,0
731561,0,0,1,0,0,0,0,0,0
731562,0,0,1,0,0,0,0,0,0
731563,0,0,0,0,0,0,1,0,0


In [104]:
y_list=[train_labels,test_labels,valid_labels]
for data in y_list:
  if data.shape[1]!=9:
    print('missing label')
  else:
    print('one hot encoded successfully')

one hot encoded successfully
one hot encoded successfully
one hot encoded successfully


In [105]:
#normalise the x data
def norm(x):
    return (x - train_stats['min']) / (train_stats['max']-train_stats['min'])
normed_train_data = norm(train_ds)
normed_test_data = norm(test_ds)
normed_valid_dataset = norm(valid_ds)

In [None]:
#train two layer neural network
start = datetime.now()
def build_model1_two_hidden_layers():
    model = Sequential()
    model.add(Dense(16, input_shape = (normed_train_data.shape[1],)))         
    model.add(Dense(train_labels.shape[1], activation='softmax'))                         
    learning_rate = 0.0001
    optimizer = optimizers.Adam(learning_rate)
    model.compile(loss='categorical_crossentropy',#from_logits=True),
                optimizer=optimizer,
                metrics=['accuracy']) 
    return model


EPOCHS = 10
batch_size = 16 

model = build_model1_two_hidden_layers()
print('Here is a summary of this model: ')
model.summary()


with tf.device('/CPU:0'): 
    history = model.fit(
        normed_train_data, 
        train_labels,
        batch_size = batch_size,
        epochs=EPOCHS, 
        verbose=1,
        shuffle=True,
        steps_per_epoch = int(normed_train_data.shape[0] / batch_size) ,
        validation_data = (normed_valid_dataset,valid_labels)   
    )
end = datetime.now()

Here is a summary of this model: 
Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_4 (Dense)             (None, 16)                608       
                                                                 
 dense_5 (Dense)             (None, 9)                 153       
                                                                 
Total params: 761
Trainable params: 761
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10

In [None]:
savemodel='/content/drive/MyDrive/Colab Notebooks/Second semester/Models/NN/local/exclude/device1'

In [None]:
#save model and its weights(savemodel)
tf.keras.models.save_model(model, savemodel)

In [85]:
#model=tf.keras.models.load_model('/content/drive/MyDrive/Colab Notebooks/Second semester/Models/NN/local/exclude/device2')

In [None]:
#display train time
print(end-start)

In [None]:
predict_results = model.predict(normed_test_data)
predict_results= predict_results.argmax(axis = 1)

In [None]:
#print precision, recall and f1 score of each label
sk_report = classification_report(digits=4,y_true=test_labels1,y_pred=predict_results)
sk_report