In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import datetime
import json
import numpy as np
import os
import pandas as pd
import sys 
import tensorflow as tf
import time
import matplotlib.pyplot as plt

from keras.callbacks import History 
history = History()

BATCH_SIZE = 10
number_of_attributes = 35

print(tf.__version__)

2.7.0


In [None]:
path_training ="/content/drive/MyDrive/csv-files/01_Atributos_Weve.csv"
path_test ="/content/drive/MyDrive/csv-files/02_Atributos_Weve.csv"
df_raw = pd.read_csv(path_training)
df_test_raw = pd.read_csv(path_test)
df_raw
target = df_raw.pop('target')
server_port = df_raw.pop('ServerPort')
client_port = df_raw.pop('ClientPort')

target_test = df_test_raw.pop('target')
server_port_test = df_test_raw.pop('ServerPort')
client_port = df_test_raw.pop('ClientPort')

attributes_array = ['10','15','17','22','24','29','31','32','33','34','36','37','38','45','46','47','48','59','60','61','62','63','64','79','80','81','82','83','84','87','88','89','90','91','92','75','76','77','78']                    

In [None]:
classes = ['WWW','MAIL','FTP-CONTROL','FTP-PASV','ATTACK','P2P','DATABASE','FTP-DATA','MULTIMEDIA','SERVICES','INTERACTIVE','GAMES']
#classes = list(target.unique())
#classes
#Converts the above list of classes to an array of index
target = target.apply(lambda x: classes.index(x))
target.unique()
target_test = target_test.apply(lambda x: classes.index(x))


In [None]:
def get_basic_model():
  model = tf.keras.Sequential([
    normalizer,
    tf.keras.layers.Dense(10, activation='relu'), 
    tf.keras.layers.Dense(12)
  ])

  model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])
  return model

In [None]:
# Select a sub-dataframe from the main dataframe with a lower number of attributes
df = df_raw[attributes_array[0:number_of_attributes]]
df_test = df_test_raw[attributes_array[0:number_of_attributes]]

print(df_test)

# Combine the data with possible classes
numeric_feature_names = attributes_array[0:number_of_attributes]
numeric_features = df[numeric_feature_names] # Data for training
numeric_features_test = df_test[numeric_feature_names] # Data for testing

# Converts pandas dataframe to tensorflow object
numeric_features = tf.convert_to_tensor(numeric_features)
#numeric_features_test = tf.convert_to_tensor(numeric_features_test) botei na célula abaixo

# Normalize the data
normalizer = tf.keras.layers.Normalization(axis=-1)
normalizer.adapt(numeric_features)
#normalizer.adapt(numeric_features_test)

# Execute Trainning and Testing
model = get_basic_model()
print('Starting training for',number_of_attributes,'attributes')
history = model.fit(numeric_features, target, epochs=10, verbose=0, batch_size=BATCH_SIZE, callbacks=[history])
train_accuracy = history.history['accuracy']
print('train_accuracy:', train_accuracy)

       10    15  17    22  24  29  31  ...   84     87     88     89     90  91  92
0      66   503  52   489  14  14   7  ...  398   6432  24616   5840  24334   0   0
1      66   465  52   451  14  14   6  ...  399   6432  24616   5840  24616   0   0
2      66   464  52   450  14  14   6  ...  398   6432  24616   5840  24616   0   0
3      66   465  52   451  14  14   6  ...  399   6432  24616   5840  24616   0   0
4      66   682  52   668  14  14   6  ...  399   6432  24616   5840  24334   0   0
...    ..   ...  ..   ...  ..  ..  ..  ...  ...    ...    ...    ...    ...  ..  ..
23796  60  1514  40  1500  14  20  14  ...   21  17520  58400  16384  56940   0   0
23797  60  1414  40  1400  14  20  16  ...   21  17680  58480  16384  57120   0   0
23798  60  1514  40  1500  14  20  14  ...  759  33580  61440  32821  61440   0   0
23799  60   989  40   975  14  20   7  ...  449  33580  61440  33580  61440   0   0
23800  60  1514  40  1500  14  20  24  ...    2  33580  61440  31845  61440 

In [None]:
number_of_permutations = 33 # for each attribute
test_loss, test_acc =  model.evaluate(numeric_features_test, target_test, verbose=0, batch_size=BATCH_SIZE)

permutated_accuracy_means_array = []

#for every attribute
for attr_i, attr in enumerate(numeric_features_test.columns):
  print(f"Running attr: {attr}")  
  
  permutated_accuracy_array = []  
  # run n times for each attribute
  for n in range(number_of_permutations):  
    df_permutated = numeric_features_test.copy()
    df_permutated[attr] = np.random.permutation(df_permutated[attr].values)
    #display('original:',numeric_features_test[attr])
    #display('permutaded',df_permutated[attr])

    df_permutated = tf.convert_to_tensor(df_permutated)      
    permutated_loss, permutated_acc = model.evaluate(df_permutated,  target_test, verbose=0, batch_size=BATCH_SIZE)  
    permutated_accuracy_array.append(permutated_acc)

  print('permutated_accuracy_array:',permutated_accuracy_array)
  permutated_accuracy_mean = sum(permutated_accuracy_array) / len(permutated_accuracy_array)  
  print('permutated_accuracy_mean',permutated_accuracy_mean)
  ratio = permutated_accuracy_mean / test_acc
  print(f"Original Accuracy: {test_acc:.3f} Permutated Accuracy: {permutated_accuracy_mean:.3f} Ratio: {ratio:.3f}\n")

  permutated_accuracy_means_array.append((permutated_accuracy_mean, numeric_feature_names[attr_i])) 
  #print('permutated_accuracy_means_array',permutated_accuracy_means_array,'\n')
  #if attr_i > 3: break

permutated_accuracy_means_array = sorted(permutated_accuracy_means_array)
print('permutated_accuracy_means_array SORTED',permutated_accuracy_means_array)
attributes = [x for _, x in permutated_accuracy_means_array]
print('sorted attributes', attributes)

Running attr: 10
permutated_accuracy_array: [0.954917848110199, 0.9550859332084656, 0.9549598693847656, 0.955926239490509, 0.9551279544830322, 0.955548107624054, 0.9551699757575989, 0.954917848110199, 0.9557161331176758, 0.9551699757575989, 0.955548107624054, 0.955422043800354, 0.9548758268356323, 0.9551699757575989, 0.9561362862586975, 0.9553800225257874, 0.9550018906593323, 0.9552539587020874, 0.9550859332084656, 0.955295979976654, 0.9553380012512207, 0.9550859332084656, 0.955422043800354, 0.9542036056518555, 0.9555901288986206, 0.9551279544830322, 0.9550859332084656, 0.9547078013420105, 0.9552119374275208, 0.9551279544830322, 0.9552119374275208, 0.9559682607650757, 0.9551279544830322]
permutated_accuracy_mean 0.9552399805097869
Original Accuracy: 0.957 Permutated Accuracy: 0.955 Ratio: 0.998

Running attr: 15
permutated_accuracy_array: [0.9419352412223816, 0.9437838792800903, 0.9428175091743469, 0.9429015517234802, 0.941641092300415, 0.9427335262298584, 0.9426494836807251, 0.9422713