In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import datetime
import json
import numpy as np
import os
import pandas as pd
import sys 
import tensorflow as tf
import time
import matplotlib.pyplot as plt

from keras.callbacks import History 
history = History()

BATCH_SIZE = 10

number_of_attributes = 35

print(tf.__version__)

2.8.0


In [3]:
path_training ="/content/drive/MyDrive/network-traffic-classification-main/csv-files/01_Atributos_Weve.csv"
path_test ="/content/drive/MyDrive/network-traffic-classification-main/csv-files/02_Atributos_Weve.csv"
df = pd.read_csv(path_training)
df_test = pd.read_csv(path_test)

target = df.pop('target')
server_port = df.pop('ServerPort')
client_port = df.pop('ClientPort')

target_test = df_test.pop('target')
server_port_test = df_test.pop('ServerPort')
client_port = df_test.pop('ClientPort')

attributes_array = ['10','15','17','22','24','29','31','32','33','34','36','37','38','45','46','47','48','59','60','61','62','63','64','75','76','77','78','79','80','81','82','83','84','87','88','89','90','91','92']                    

In [4]:
classes = ['WWW','MAIL','FTP-CONTROL','FTP-PASV','ATTACK','P2P','DATABASE','FTP-DATA','MULTIMEDIA','SERVICES','INTERACTIVE','GAMES']
#Converts the above list of classes to an array of index
target = target.apply(lambda x: classes.index(x))
target_test = target_test.apply(lambda x: classes.index(x))

In [5]:
def get_basic_model():
  model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(10, activation='relu'), 
    tf.keras.layers.Dense(12)
  ])

  model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])
  return model

In [6]:
# Combine the data with possible classes
numeric_feature_names = attributes_array[0:number_of_attributes]
numeric_features = df[numeric_feature_names] # Data for training
numeric_features_test = df_test[numeric_feature_names] # Data for testing

# Converts pandas dataframe to tensorflow object
numeric_features = tf.convert_to_tensor(numeric_features)
#numeric_features_test = tf.convert_to_tensor(numeric_features_test) botei na célula abaixo

# Normalize the data
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(numeric_features)
#normalizer.adapt(numeric_features_test)

# Execute Trainning and Testing
#model = get_basic_model()
print('Starting training for',number_of_attributes,'attributes')
train_accuracy = []
test_acc = []

model = get_basic_model()
history = model.fit(numeric_features, target, epochs=10, verbose=0, batch_size=BATCH_SIZE, callbacks=[history])
one_train_accuracy = history.history['accuracy']

test_loss, one_test_acc =  model.evaluate(numeric_features_test,  target_test, verbose=2, batch_size=BATCH_SIZE)
train_accuracy.append(one_train_accuracy[-1])
test_acc.append(one_test_acc)

train_accuracy_mean = sum(train_accuracy) / len(train_accuracy)
test_acc_mean = sum(test_acc) / len(test_acc)


Starting training for 35 attributes
2381/2381 - 3s - loss: 0.1594 - accuracy: 0.9547 - 3s/epoch - 1ms/step


In [7]:
# Feature Importance using crescent connection weights

# #layer0_weights = model.layers[0].get_weights()[0] #weight
# #layer0_biases  = model.layers[0].get_weights()[1] #bias
layer1_weights = model.layers[1].get_weights()[0] #weight
# #layer1_biases  = model.layers[1].get_weights()[1] #bias
print('Layer 1 weights:',layer1_weights)
# # print('\n')

weight_attribute = []
for attr_i, weights in enumerate(layer1_weights):
  weight_attribute.append((weights.sum(), numeric_feature_names[attr_i]))

weight_attribute = sorted(weight_attribute)
print('\n',weight_attribute)

attributes = [x for _, x in weight_attribute]
print('\n',attributes,'\n')

# Date for report file
current_date_and_time = datetime.date.today()
current_date_and_time_string = str(current_date_and_time)
# Report file content
attr_report = {
    "datetime": current_date_and_time_string,
    "sorted_attributes_by_weights": attributes
}
# Writes into the file
attr_title = "/content/drive/MyDrive/network-traffic-classification-main/nn-attribute-arrays/%s"%current_date_and_time_string
with open(attr_title, "w") as f:
  json.dump(attr_report , f)
with open("/content/drive/MyDrive/network-traffic-classification-main/nn-attribute-arrays/last-run", "w") as f:
  json.dump(attr_report , f)
with open(attr_title, "r") as f:
  rep = json.load(f)
  display(rep)  


Layer 1 weights: [[-6.04933023e-01 -7.21737370e-02  6.55397177e-01 -4.87366199e-01
  -4.70467001e-01  4.29729521e-01 -3.54758918e-01 -2.50419319e-01
  -4.01023030e-01  1.66498885e-01]
 [ 1.67738497e-01 -2.86763459e-01  2.61035711e-01 -2.01755628e-01
  -4.75712240e-01 -6.41935706e-01  4.92600590e-01 -2.81289786e-01
  -6.84118792e-02  3.74361537e-02]
 [-6.19547367e-01 -3.30595039e-02  6.47605240e-01 -3.14886063e-01
  -1.61177218e-01  2.34336317e-01  1.11175170e-02 -2.08516017e-01
   2.58254200e-01  8.75284746e-02]
 [-1.13349788e-01 -4.24508989e-01 -5.65196462e-02  1.61544263e-01
  -1.08730204e-01 -1.88301027e-01 -1.68350950e-01 -6.12193704e-01
   4.40065153e-02  2.06902802e-01]
 [-1.01059206e-01 -1.49129462e+00 -8.58264625e-01 -3.80836911e-02
   3.13938893e-02  3.04125100e-01  6.16348982e-02 -2.13239454e-02
  -6.26278371e-02  3.53288144e-01]
 [ 7.08127141e-01  9.44043100e-02 -1.81420654e-01  2.60761768e-01
  -1.26099709e-04 -2.48778444e-02  1.64445177e-01  3.91582727e-01
  -3.28209877e-0

{'datetime': '2022-03-02',
 'sorted_attributes_by_weights': ['59',
  '60',
  '37',
  '24',
  '36',
  '38',
  '10',
  '87',
  '22',
  '82',
  '81',
  '15',
  '48',
  '33',
  '62',
  '47',
  '45',
  '84',
  '46',
  '64',
  '80',
  '34',
  '32',
  '17',
  '78',
  '76',
  '31',
  '77',
  '79',
  '61',
  '75',
  '29',
  '63',
  '88',
  '83']}