In [1]:
import sys
import os
import pandas as pd
import numpy as np
import tensorflow as tf
# import matplotlib.pyplot as plt
import json
from datetime import datetime

In [2]:
print(tf.version.VERSION)

2.18.0-dev20240611


### Specify here the NN topology and the dataframe to be loaded

In [3]:
#specify the NN topology here:
#input_layer_nodes = number_of_attributes
number_of_attributes = 16
hidden_layer_nodes = 32
output_layer_nodes = 2


use_case = "netml-iot"
dataset_folder = f"../datasets/nprint-raw/{use_case}/"
features_rankings = f"../datasets/nprint-raw/{use_case}/feature-importance.csv"



###Define the model

In [4]:
BATCH_SIZE = 512
EPOCHS = 500

def get_basic_model():
  model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(hidden_layer_nodes, activation='relu'),
    tf.keras.layers.Dense(output_layer_nodes)
  ])

  model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])
  return model

###Import the data

In [5]:
df_train = pd.read_csv(f'{dataset_folder}X.csv')
df_test = pd.read_csv(f'{dataset_folder}X_val.csv')
label_train = pd.read_csv(f'{dataset_folder}y.csv')
label_test = pd.read_csv(f'{dataset_folder}y_val.csv')

df_train.drop('pcap', axis=1, inplace=True)
df_test.drop('pcap', axis=1, inplace=True)
label_train.drop('pcap', axis=1, inplace=True)
label_test.drop('pcap', axis=1, inplace=True)
label_train.rename(columns={'0': 'label'}, inplace=True)
label_test.rename(columns={'0': 'label'}, inplace=True)

# display(df_train)
# display(df_test)
# display(label_train)
# display(label_test)

In [6]:
#select a sub-dataframe with only the top attrs rate by the AUTOGLUON feature importance algorithm
#load the file with the rankings
features_rankings_df = pd.read_csv(features_rankings)
features_rankings_df.rename(columns={'0': 'label'}, inplace=True)

#make a list with only the best features
feature_list = features_rankings_df['Unnamed: 0'].tolist()
feature_list = feature_list[:number_of_attributes]
# print(feature_list)

#select a sub-dataframe with only the 32 top attrs rate by the AUTOGLUON feature importance algorithm
df_train = df_train[feature_list]
df_test = df_test[feature_list]
# display(df_train)
# display(df_test)

### NN training

In [7]:
# Begin NN training
NUMBER_OF_ATTRS = df_train.shape[1]

# Convert int values to TF expect values (float)
df_train = np.asarray(df_train).astype(np.float32)
df_test = np.asarray(df_test).astype(np.float32)
# Converts pandas dataframe to tensorflow object
df_train = tf.convert_to_tensor(df_train)
# Normalize the data
normalizer = tf.keras.layers.Normalization()
normalizer.adapt(df_train)
#normalizer.adapt(numeric_features_test)

# Execute Trainning 
print('Starting training for',number_of_attributes,'attributes')
model = get_basic_model()
model.fit(df_train, label_train, epochs=EPOCHS, verbose=2, batch_size=BATCH_SIZE)

# and Testing
test_loss, one_test_acc =  model.evaluate(df_test,  label_test, verbose=1, batch_size=BATCH_SIZE)
tf_predictions_probabilities = model.predict(df_test)

#Crate Confusion Matrix for better understanding of results
tf_predictions = []
for i,x in enumerate(tf_predictions_probabilities):
  #print("i:",i,"x_max:",x.max(),"x:",x)
  j_max = x.argmax()
  tf_predictions.append(j_max)

conf_m = tf.math.confusion_matrix(label_test,tf_predictions)
print(conf_m)

Starting training for 16 attributes
Epoch 1/500
24/24 - 0s - 21ms/step - accuracy: 0.7357 - loss: 0.5018
Epoch 2/500
24/24 - 0s - 1ms/step - accuracy: 0.8634 - loss: 0.3451
Epoch 3/500
24/24 - 0s - 1ms/step - accuracy: 0.9495 - loss: 0.2507
Epoch 4/500
24/24 - 0s - 1ms/step - accuracy: 0.9895 - loss: 0.1868
Epoch 5/500
24/24 - 0s - 1ms/step - accuracy: 0.9901 - loss: 0.1429
Epoch 6/500
24/24 - 0s - 1ms/step - accuracy: 0.9900 - loss: 0.1120
Epoch 7/500
24/24 - 0s - 1ms/step - accuracy: 0.9907 - loss: 0.0897
Epoch 8/500
24/24 - 0s - 1ms/step - accuracy: 0.9912 - loss: 0.0737
Epoch 9/500
24/24 - 0s - 1ms/step - accuracy: 0.9913 - loss: 0.0619
Epoch 10/500
24/24 - 0s - 1ms/step - accuracy: 0.9917 - loss: 0.0530
Epoch 11/500
24/24 - 0s - 1ms/step - accuracy: 0.9921 - loss: 0.0464
Epoch 12/500
24/24 - 0s - 1ms/step - accuracy: 0.9920 - loss: 0.0412
Epoch 13/500
24/24 - 0s - 1ms/step - accuracy: 0.9925 - loss: 0.0372
Epoch 14/500
24/24 - 0s - 1ms/step - accuracy: 0.9938 - loss: 0.0341
Epoch 

In [8]:
# Model info
model.summary()

norm_layer_weights = model.layers[0].get_weights()[0] #weight
norm_layer_biases  = model.layers[0].get_weights()[1] #bias
hidden_layer_weights = model.layers[1].get_weights()[0] #weight
hidden_layer_biases  = model.layers[1].get_weights()[1] #bias
out_layer_weights = model.layers[2].get_weights()[0] #weight
out_layer_biases  = model.layers[2].get_weights()[1] #bias
print('\nInput -> Normalization Layer bias:\n',norm_layer_biases)
print('\nInput -> Normalization Layer weights:\n',norm_layer_weights)
print('\nNormalization -> Hidden Layer bias:\n',hidden_layer_biases)
print('\nNormalization -> Hidden Layer weights:\n',hidden_layer_weights)
print('\nHidden -> Output Layer bias:\n',out_layer_biases)
print('\nHidden -> Output Layer weights:\n',out_layer_weights,'\n')


Input -> Normalization Layer bias:
 [0.26042014 0.2591248  0.06433415 0.05689694 0.26671037 0.5895072
 0.03620648 0.26985016 0.0080265  0.1776892  0.68416846 0.9210642
 0.40731376 0.4540065  0.36856326 0.25212252]

Input -> Normalization Layer weights:
 [ 0.57587385  0.5840494   0.05072162  0.06056561 -0.15416701 -0.45499292
  0.02043881 -0.60256946 -0.0080921   0.7689163  -0.3951781   0.19404355
 -0.27921915 -0.2487695  -0.7941103  -0.6105781 ]

Normalization -> Hidden Layer bias:
 [ 0.16697688  0.11481686  0.31152403  0.09247332  0.03269651  0.3330602
 -0.1946107  -0.07160804 -0.03138632  0.20187598 -0.21255657  0.1747336
  0.02342194  0.69106007  0.07127851 -0.03114577  0.16166943 -0.03466644
  0.5816025  -0.03995842 -0.07570551  0.07546188 -0.10182013  0.11338563
  0.32303998 -0.07530031 -0.01300398  0.16719915  0.67625046  0.1945209
 -0.18082404 -0.10174499]

Normalization -> Hidden Layer weights:
 [[ 0.4242589   0.16685    -0.26416257 -0.0513381   0.18221745 -0.2269297
   0.5243

In [9]:
# Date for report file
now = datetime.now()
dt_string = now.strftime("%d-%m-%Y-%H-%M-%S")
print(dt_string)
# Report file content
report = {
    "datetime": dt_string,
    "number_of_attributes": number_of_attributes,
    "hidden_layer_nodes": hidden_layer_nodes,
    "output_layer_nodes": output_layer_nodes,
    "accuracy_test": one_test_acc,
    "batch_size": BATCH_SIZE,
    "epochs": EPOCHS
    #"number_of_samples": number_of_samples,
    #"df_number": df_number
}

12-07-2024-16-46-44


In [10]:
#TO DO: use save_models instead of save_weights

# Model parameters json file generation, create file with date-time string to prevent unwated/accidental overwrites
os.chdir('../tf-params-reports/')
print(os.getcwd())

title_parameters_save = f"nn-nprint-{use_case}-model-parameters-{number_of_attributes}x{hidden_layer_nodes}x{output_layer_nodes}.json"
with open(title_parameters_save, "w") as f:
  json.dump(report, f)

title_parameters_save = f"nn-nprint-{use_case}-model-parameters-{number_of_attributes}x{hidden_layer_nodes}x{output_layer_nodes}-{dt_string}.json"
with open(title_parameters_save, "w") as f:
  json.dump(report, f)

# Model file with weights and other params
os.chdir('../tf-models/') 
print(os.getcwd())

title_model_save = f'nn-nprint-{use_case}-model-{number_of_attributes}x{hidden_layer_nodes}x{output_layer_nodes}.keras'
model.save(title_model_save)

# Model weights file generation
# os.chdir('../tf-model-weights/')
# print(os.getcwd())

# title_model_save = f'nn-nprint-model-weights-{number_of_attributes}x{hidden_layer_nodes}x{output_layer_nodes}.weights.h5'
# model.save_weights(title_model_save) # Calling `save('my_model')` creates a SavedModel folder `my_model`.

# title_model_save = f'nn-nprint-model-weights-{number_of_attributes}x{hidden_layer_nodes}x{output_layer_nodes}-{dt_string}.weights.h5'
# model.save_weights(title_model_save) # Calling `save('my_model')` creates a SavedModel folder `my_model`.
    
# model.load_weights(f'nn-nprint-app-iden-model-weights-{number_of_attributes}x{hidden_layer_nodes}x{output_layer_nodes}.weights.h5')

C:\NN-P4\tf-params-reports
C:\NN-P4\tf-models


In [11]:
# Acervo - comandos úteis os python
# %pwd
# os.chdir(C:\NN-P4\nn-reports)
# cwd = os.getcwd()
# print(cwd)
# os.listdir()
# os.chdir('../nn-reports/')

In [12]:
# df_train = pd.read_pickle('/content/drive/MyDrive/nprint/nn-reproduction/app-iden/X.pkl')
# df_test = pd.read_pickle('/content/drive/MyDrive/nprint/nn-reproduction/app-iden/X_val.pkl')
# label_train = pd.read_pickle('/content/drive/MyDrive/nprint/nn-reproduction/app-iden/y.pkl')
# label_test = pd.read_pickle('/content/drive/MyDrive/nprint/nn-reproduction/app-iden/y_val.pkl')