In [62]:
%reset -f
%connect_info

{
  "shell_port": 58339,
  "iopub_port": 36973,
  "stdin_port": 42245,
  "control_port": 42507,
  "hb_port": 60351,
  "ip": "127.0.0.1",
  "key": "e7469cb1-f40cad1b4970659273a9cfde",
  "transport": "tcp",
  "signature_scheme": "hmac-sha256",
  "kernel_name": ""
}

Paste the above JSON into a file, and connect with:
    $> jupyter <app> --existing <file>
or, if you are local, you can connect with just:
    $> jupyter <app> --existing kernel-22de5991-ad8e-4f62-a297-7f6ad2c64de5.json
or even just:
    $> jupyter <app> --existing
if this is the most recent Jupyter kernel you have started.


In [63]:
import numpy as np
import pandas as pd
import sys, os, random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer, Dropout, Conv1D, Conv2D, Flatten, Reshape, MaxPooling1D, MaxPooling2D, BatchNormalization, TimeDistributed
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

In [64]:
## set the paths and load data
path_base = "/home/trix_arch/IIVspace/DataSc/"+\
            "DataCamp2021/DSR_B28_work/DSR28_portfolio_project/"+\
            "Measurements_n_Tests/GGS_arduino_readings/GGSv2I_complete/"
path_add = "df_data/"
path_add_folder2savemodel = "tf_models/"
model_filename = 'model_c5_n36d02n24d02_mk1'

In [65]:
filename2load = "xdf_20211221_run5.csv"
Xdf = pd.read_csv(path_base + path_add + filename2load)

In [66]:
Xdf.head()

Unnamed: 0,time_ms,B102NO2,B302C2H5OH,B502VOC,B702CO,TdegC,RH,label,add_label,condition
0,15,549,153,514,318,25,34,beer,1,roomoffice
1,16,548,154,515,318,25,34,beer,1,roomoffice
2,16,549,153,515,319,25,34,beer,1,roomoffice
3,16,549,154,515,318,25,34,beer,1,roomoffice
4,17,549,154,515,319,25,34,beer,1,roomoffice


In [67]:
X_init = Xdf.iloc[:,1:7]
Y_init = Xdf.label

In [68]:
classes_values = Xdf.label.unique().tolist()
classes_values.sort()
classes = len(classes_values)
classes_values

['beer', 'coffee', 'orange']

In [69]:
dict_4labels = {'beer': 1,
 'coffee': 2,
 'orange': 3
 }

In [70]:
# Set random seeds for repeatable results
RANDOM_SEED = 3
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)




Y = tf.keras.utils.to_categorical(Y_init.replace(dict_4labels)-1, classes)
X = X_init.to_numpy()

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)

input_length = X_train[0].shape[0]

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
validation_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test))

callbacks = []


In [71]:
# model architecture
model = Sequential()
model.add(Dense(36, 
                activation='relu', 
                activity_regularizer=tf.keras.regularizers.l1(0.00001)))
model.add(Dropout(0.2))
model.add(Dense(24, 
                activation='relu',
                activity_regularizer=tf.keras.regularizers.l1(0.00001)))
model.add(Dropout(0.2))
model.add(Dense(classes, 
                activation='softmax', 
                name='y_pred'))

# this controls the learning rate
opt = Adam(learning_rate=0.0005, beta_1=0.9, beta_2=0.999)

# this controls the batch size, or you can manipulate the tf.data.Dataset objects yourself
BATCH_SIZE = 32
train_dataset = train_dataset.batch(BATCH_SIZE, drop_remainder=False)
validation_dataset = validation_dataset.batch(BATCH_SIZE, drop_remainder=False)

# train the neural network
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])


In [72]:
model.fit(train_dataset, epochs=15, validation_data=validation_dataset, verbose=2, callbacks=callbacks)

Epoch 1/15
360/360 - 1s - loss: 33.3161 - accuracy: 0.6095 - val_loss: 0.0445 - val_accuracy: 0.9997
Epoch 2/15
360/360 - 0s - loss: 4.8296 - accuracy: 0.8156 - val_loss: 0.0262 - val_accuracy: 1.0000
Epoch 3/15
360/360 - 0s - loss: 1.6314 - accuracy: 0.8893 - val_loss: 0.0227 - val_accuracy: 1.0000
Epoch 4/15
360/360 - 1s - loss: 0.7658 - accuracy: 0.9220 - val_loss: 0.0206 - val_accuracy: 1.0000
Epoch 5/15
360/360 - 1s - loss: 0.4320 - accuracy: 0.9458 - val_loss: 0.0190 - val_accuracy: 1.0000
Epoch 6/15
360/360 - 1s - loss: 0.2779 - accuracy: 0.9577 - val_loss: 0.0181 - val_accuracy: 1.0000
Epoch 7/15
360/360 - 1s - loss: 0.1988 - accuracy: 0.9680 - val_loss: 0.0172 - val_accuracy: 1.0000
Epoch 8/15
360/360 - 1s - loss: 0.1427 - accuracy: 0.9745 - val_loss: 0.0163 - val_accuracy: 1.0000
Epoch 9/15
360/360 - 1s - loss: 0.0969 - accuracy: 0.9799 - val_loss: 0.0157 - val_accuracy: 1.0000
Epoch 10/15
360/360 - 1s - loss: 0.0861 - accuracy: 0.9829 - val_loss: 0.0151 - val_accuracy: 1.000

<keras.callbacks.History at 0x7f130d75ae50>

In [73]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 36)                252       
_________________________________________________________________
dropout_6 (Dropout)          (None, 36)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 24)                888       
_________________________________________________________________
dropout_7 (Dropout)          (None, 24)                0         
_________________________________________________________________
y_pred (Dense)               (None, 3)                 75        
Total params: 1,215
Trainable params: 1,215
Non-trainable params: 0
_________________________________________________________________


In [74]:
# Evaluate the model
loss, acc = model.evaluate(X_test, Y_test, verbose=2)
print("Model, accuracy: {:5.2f}%".format(100 * acc))

90/90 - 0s - loss: 0.0122 - accuracy: 1.0000
Model, accuracy: 100.00%


In [75]:
# Save the model to disk
# model.save('saved_model')

# !mkdir -p saved_model

model.save(path_base + path_add_folder2savemodel + model_filename )

INFO:tensorflow:Assets written to: /home/trix_arch/IIVspace/DataSc/DataCamp2021/DSR_B28_work/DSR28_portfolio_project/Measurements_n_Tests/GGS_arduino_readings/GGSv2I_complete/tf_models/model_c5_n36d02n24d02_mk1/assets


In [76]:
#########################
## Load into a new model
#########################
new_model = tf.keras.models.load_model(path_base + path_add_folder2savemodel + model_filename )

# Check its architecture
new_model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 36)                252       
_________________________________________________________________
dropout_6 (Dropout)          (None, 36)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 24)                888       
_________________________________________________________________
dropout_7 (Dropout)          (None, 24)                0         
_________________________________________________________________
y_pred (Dense)               (None, 3)                 75        
Total params: 1,215
Trainable params: 1,215
Non-trainable params: 0
_________________________________________________________________


In [77]:
loss, acc = new_model.evaluate(X_test, Y_test, verbose=2)
print('Restored model, accuracy: {:5.2f}%'.format(100 * acc))

print(new_model.predict(X_test).shape)

90/90 - 0s - loss: 0.0122 - accuracy: 1.0000
Restored model, accuracy: 100.00%
(2880, 3)


In [78]:
#########################
##### tests
#########################

In [79]:
##### random sample from TRAIN set
randt_index_X = np.random.randint(low=0, high=X_train.shape[0], size=1, dtype=int)
print(f"Values_Train: {X_train[randt_index_X,:]}")
print(f"Label_Train:  {Y_train[randt_index_X]}")
print(f"Label_Text:   {pd.DataFrame(classes_values).loc[[bool(cc) for cc in Y_train[randt_index_X].tolist()[0]],:].values}")

Values_Train: [[728 642 540 210  25  34]]
Label_Train:  [[0. 1. 0.]]
Label_Text:   [['coffee']]


In [80]:
##### random sample from TEST set
randt_index_X = np.random.randint(low=0, high=X_test.shape[0], size=1, dtype=int)
print(f"Values_Test: {X_test[randt_index_X,:]}")
print(f"Label_Test:  {Y_test[randt_index_X]}")
print(f"Label_Text:   {pd.DataFrame(classes_values).loc[[bool(cc) for cc in Y_test[randt_index_X].tolist()[0]],:].values}")

Values_Test: [[548 153 514 318  25  34]]
Label_Test:  [[1. 0. 0.]]
Label_Text:   [['beer']]


In [83]:
##### random sample from TEST set
randt_index_X = np.random.randint(low=0, high=X_test.shape[0], size=1, dtype=int)
print(f"Values_Test: {X_test[randt_index_X,:]}")
print(f"Label_Test:  {Y_test[randt_index_X]}")
print(f"Label_Text:   {pd.DataFrame(classes_values).loc[[bool(cc) for cc in Y_test[randt_index_X].tolist()[0]],:].values}")

Values_Test: [[900 372 804 359  26  33]]
Label_Test:  [[0. 0. 1.]]
Label_Text:   [['orange']]


In [81]:
randt_index_X = np.random.randint(low=0, high=X_init.shape[0], size=1, dtype=int)

print(f"X sample: {X_init.loc[randt_index_X, :]} \nY label: {Y_init.loc[randt_index_X]}")

X sample:        B102NO2  B302C2H5OH  B502VOC  B702CO  TdegC  RH
11513      920         427      840     449     25  34 
Y label: 11513    orange
Name: label, dtype: object
