In [5]:
import pandas as pd
import numpy as np
from tensorflow import keras
from keras import layers, applications, utils, models, optimizers, Input, callbacks
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN
from sklearn.model_selection import train_test_split

# Code pour modèles et préparation des données

## Modèles

In [6]:
def dense_network(input_sequence_shape):
    simple_model = keras.Sequential(
        [
            layers.Dense(
                128, input_shape=(20, input_sequence_shape), activation="relu"
            ),
            # layers.BatchNormalization(), # les batchNormalization fond baisser l'accuracy
            layers.Dense(256, activation="relu"),
            # layers.BatchNormalization(),
            layers.Dense(64, activation="relu"),
            layers.Dense(1, activation="linear"),  # avec relu on perd un peu d'accuracy
        ]
    )
    return simple_model


def stacked_RNN(
    hidden_units=32, dense_units=1, input_shape=(20, 13), activation=["relu", "relu"]
):
    """
    hidden_units : nombre de neurones dans la couche SimpleRNN
    dense_units : nombre de neurones dans la couche Dense
    activation : liste des deux fonctions d'activation
    """

    model = Sequential()
    model.add(
        SimpleRNN(
            hidden_units,
            input_shape=input_shape,
            return_sequences=True,
            activation=activation[0],
        )
    )
    model.add(SimpleRNN(32, activation=activation[0]))
    model.add(keras.layers.BatchNormalization())
    model.add(Dense(64, activation=activation[1]))
    model.add(Dense(units=dense_units, activation="sigmoid"))

    return model


def stacked_LSTM_small(input_sequence_shape):
    lstm_model = keras.Sequential(
        [
            layers.Dense(32, input_shape=(20, input_sequence_shape), activation="relu"),
            layers.LSTM(128, return_sequences=True),
            layers.LSTM(128),
            # layers.BatchNormalization(),
            layers.Dense(64, activation="relu"),
            layers.Dense(1, activation="sigmoid"),
        ]
    )
    return lstm_model


# Mix
def mix_rnn_lstm(input_sequence_shape):
    lstm_rnn_model = keras.Sequential(
        [
            layers.Dense(32, input_shape=(20, input_sequence_shape), activation="relu"),
            layers.LSTM(
                128, return_sequences=True, activation="relu"
            ),  # return_sequences à True pour que la sortie soit de dimension 3
            layers.BatchNormalization(),
            layers.Dense(64, activation="relu"),
            layers.SimpleRNN(64, activation="relu", return_sequences=True),
            layers.SimpleRNN(32, activation="relu"),
            layers.BatchNormalization(),
            layers.Dense(64, activation="relu"),
            # layers.Dropout(0.5),
            layers.Dense(1, activation="sigmoid"),
        ]
    )
    return lstm_rnn_model


# Single GRU layer of 256 units
def simple_GRU(input_sequence_shape):
    simple_GRU = keras.models.Sequential()
    simple_GRU.add(
        keras.layers.Dense(
            32, input_shape=(20, input_sequence_shape), activation="relu"
        )
    )
    simple_GRU.add(keras.layers.Dropout(0.2))
    simple_GRU.add(keras.layers.BatchNormalization())
    simple_GRU.add(keras.layers.GRU(256, return_sequences=False, activation="relu"))
    simple_GRU.add(keras.layers.Dropout(0.2))
    simple_GRU.add(keras.layers.BatchNormalization())
    # simple_GRU.add( keras.layers.Dense(64, activation='relu') )
    # simple_GRU.add( keras.layers.Dropout(0.2) )
    simple_GRU.add(keras.layers.Dense(1, activation="sigmoid"))
    return simple_GRU


# 3-stacked GRU model
def stacked_GRU(input_sequence_shape):
    stacked_GRU_model = keras.models.Sequential()
    stacked_GRU_model.add(
        keras.layers.Dense(
            32, input_shape=(20, input_sequence_shape), activation="relu"
        )
    )
    stacked_GRU_model.add(keras.layers.Dropout(0.2))
    stacked_GRU_model.add(keras.layers.BatchNormalization())
    stacked_GRU_model.add(
        keras.layers.GRU(256, return_sequences=True, activation="relu")
    )
    stacked_GRU_model.add(
        keras.layers.GRU(256, return_sequences=True, activation="relu")
    )
    stacked_GRU_model.add(
        keras.layers.GRU(256, return_sequences=False, activation="relu")
    )
    stacked_GRU_model.add(keras.layers.Dropout(0.2))
    stacked_GRU_model.add(keras.layers.BatchNormalization())
    # stacked_GRU_model.add( keras.layers.Dense(64, activation='relu') )
    # stacked_GRU_model.add( keras.layers.Dropout(0.2) )
    stacked_GRU_model.add(keras.layers.Dense(1, activation="sigmoid"))
    return stacked_GRU_model


# 3-stacked LSTM model
def stacked_LSTM(input_sequence_shape):
    stacked_LSTM_model = keras.models.Sequential()
    stacked_LSTM_model.add(
        keras.layers.Dense(
            32, input_shape=(20, input_sequence_shape), activation="relu"
        )
    )
    stacked_LSTM_model.add(keras.layers.Dropout(0.2))
    stacked_LSTM_model.add(keras.layers.BatchNormalization())
    stacked_LSTM_model.add(
        keras.layers.LSTM(256, return_sequences=True, activation="relu")
    )
    stacked_LSTM_model.add(
        keras.layers.LSTM(256, return_sequences=True, activation="relu")
    )
    stacked_LSTM_model.add(
        keras.layers.LSTM(256, return_sequences=False, activation="relu")
    )
    stacked_LSTM_model.add(keras.layers.Dropout(0.2))
    stacked_LSTM_model.add(keras.layers.BatchNormalization())
    # stacked_LSTM_model.add( keras.layers.Dense(64, activation='relu') )
    # stacked_LSTM_model.add( keras.layers.Dropout(0.2) )
    stacked_LSTM_model.add(keras.layers.Dense(1, activation="sigmoid"))
    return stacked_LSTM_model


# 3-stacked RNN model
def stacked_RNN(input_sequence_shape):
    stacked_RNN_model = keras.models.Sequential()
    stacked_RNN_model.add(
        keras.layers.Dense(
            32, input_shape=(20, input_sequence_shape), activation="relu"
        )
    )
    stacked_RNN_model.add(keras.layers.Dropout(0.2))
    stacked_RNN_model.add(keras.layers.BatchNormalization())
    stacked_RNN_model.add(
        keras.layers.SimpleRNN(256, return_sequences=True, activation="relu")
    )
    stacked_RNN_model.add(
        keras.layers.SimpleRNN(256, return_sequences=True, activation="relu")
    )
    stacked_RNN_model.add(
        keras.layers.SimpleRNN(256, return_sequences=False, activation="relu")
    )
    stacked_RNN_model.add(keras.layers.Dropout(0.2))
    stacked_RNN_model.add(keras.layers.BatchNormalization())
    # stacked_RNN_model.add( keras.layers.Dense(64, activation='relu') )
    # stacked_RNN_model.add( keras.layers.Dropout(0.2) )
    stacked_RNN_model.add(keras.layers.Dense(1, activation="sigmoid"))
    return stacked_RNN_model

## Préparation des données

In [7]:
def import_dataset(
    dataset_path,
    columns=[
        "distance",
        "distRealSR1",
        "pos_y_rec_f",
        "pos_y_rec",
        "pos_x_rec_f",
        "pos_x_rec",
        "nb_packets_sent",
        "label",
    ],
    data_type={
        "label":"int8"
    }
):
    # Import du csv
    data = pd.read_csv(
        dataset_path,
        usecols=columns,
        index_col=False,
        dtype=data_type
    )

    print("Nombre de lignes avant nettoyage : ", data.shape[0])

    # On remplace les données infinies par nan si elles existent
    data.replace([np.inf, -np.inf], np.nan, inplace=True)

    # Drop les lignes avec nan
    data.dropna(inplace=True)
    print("Nombre de lignes après nettoyage : ", data.shape[0])

    return data


def sample_dataset(dataset, sample_nb):
    return dataset.sample(sample_nb)

def flat_sequence_creation(df):
    senders_sequences = []
    senders_label = []
    senders = np.unique(df["sender"].values)
    for sender in senders:
        # Données d'un seul sender rangée en fonction de l'heure d'envoi
        sender_data_sorted = df.loc[df['sender'] == sender].sort_values("sendTime")

        # On récupère la valeur du label pour ce sender
        """ On remplasse toute les valeur !=0 en 1 """
        if sender_data_sorted['label'].tolist()[0] != 0 :
            label=1
        else :
            label = sender_data_sorted['label'].tolist()[0]
        #On supprime les colonnes label et sender
        sender_data_sorted = sender_data_sorted.drop(["label","sender"], axis=1)
        
        #sequence_array = []

        length = sender_data_sorted.shape[0]
        slide = 10
        start = 0
        end = 20

        # On vérifie qu'il est possible de faire une séquence de taille 20
        while length > 20:
            # Extraction par tranche de 20 avec une inter de 10
            sequence = sender_data_sorted[start:end]

            # Labels correspondant
            #labels =  pd.Series.tolist(sequence["label"])

            # On transforme les 13 en 1, cette formule marche toujours si on met d'autres types d'attaques
            #labels[:] = [x if x == 0 else 1 for x in labels]

            # Attribution des tableaux numpy
            senders_sequences.append(np.array(sequence.values.tolist(), dtype=np.float32))
            senders_label.append(label)

            # Mise à jour des variables
            start += slide
            end += slide
            length -= 10
        
    print('Nombre de séquences : ',len(senders_sequences))
    return senders_sequences, senders_label

def data_preparation(df, sample=False, test_size=0.1):
   
    sorted_dataset = df.sort_values("sender")
    sequence_test, label_test = flat_sequence_creation(sorted_dataset)

    # Transformation en array numpy
    X = np.array(sequence_test)
    y = np.array(label_test, dtype=np.float32)

    # Réduire le temps de training en prenant les 100000 premiers éléments
    if sample:
        X = X[:100000]
        y = y[:100000]
    # Séparation en données d'entrainement et de test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    print("X_train : ", X_train.shape)
    print("y_train : ", y_train.shape)
    print("X_test : ", X_test.shape)
    print("y_test : ", y_test.shape)
    
    return X_train, X_test, y_train, y_test

# Import des données

In [8]:
# Chemin du dossier contenant tous les .csv
data_folder = '../data'

In [12]:
# selected_columns = [
# "nb_packets_sent",
# "distance",
# "distRealSR1",
# "pos_y_rec_f",
# "pos_y_rec",
# "pos_x_rec_f",
# "pos_x_rec",
# "pos_x_send",
# "pos_y_send",
# "spd_x_send",
# "spd_y_send",
# "sendTime",
# "receiver",
# "sender",
# "label"
# ]
selected_columns = [
"distance",
"pos_x_send",
"pos_y_send",
"spd_x_send",
"spd_y_send",
"sendTime",
"sender",
"label"
]

input_sequence_shape = len(selected_columns)-2 # On drop label et sender dans la création de séquences

data_type = {
    "label":"int8",
    "sender":"int16",
    "receiver":"int16",
    "nb_packets_sent":"int16"
    
}

Import de la base de données

In [None]:
# Colonnes choisies - label, sender & sendTime obligatoires
# selected_columns = [
# "nb_packets_sent",
# "distance",
# "distRealSR1",
# "pos_y_rec_f",
# "pos_y_rec",
# "pos_x_rec_f",
# "pos_x_rec",
# "pos_x_send",
# "pos_y_send",
# "spd_x_send",
# "spd_y_send",
# "sendTime",
# "receiver",
# "sender",
# "label"
# ]
selected_columns = [
"distance",
"pos_x_send",
"pos_y_send",
"spd_x_send",
"spd_y_send",
"sendTime",
"sender",
"label"
]

input_sequence_shape = len(selected_columns)-2 # On drop label et sender dans la création de séquences

data_type = {
    "label":"int8",
    "sender":"int16",
    "receiver":"int16",
    "nb_packets_sent":"int16"
    
}

In [13]:
# DoS (13)
df_13 = import_dataset(data_folder + '/DoS_0709_new_columns.csv', selected_columns, data_type)

Nombre de lignes avant nettoyage :  4753433
Nombre de lignes après nettoyage :  4753433


In [14]:
# DoS Random (14)
df_14 = import_dataset(data_folder + '/DoS_Random_0709_new_columns.csv', selected_columns, data_type)

Nombre de lignes avant nettoyage :  4679311
Nombre de lignes après nettoyage :  4679311


In [None]:
# DoS Disruptive (15)
df_15 = import_dataset(data_folder + '/DoS_Disruptive_0709_new_columns.csv', selected_columns)

In [None]:
# DoS Random Sybil (18)
df_18 = import_dataset(data_folder + '/DoS_Random_Sybil_0709_new_columns.csv', selected_columns)

In [None]:
# DoS Disruptive Sybil (19)
df_19 = import_dataset(data_folder + '/DoS_Disruptive_Sybil_0709_new_columns.csv', selected_columns)

In [15]:
# attack_types = [13, 14, 15, 18, 19]
attack_types = [13, 14]
# dataframes = [df_13, df_14, df_15, df_18, df_19]
dataframes = [df_13, df_14]

# Entrainement des modèles pour chaque dataset

In [16]:
EPOCHS = 20
VAL_SPLIT = 0.3
ADAM = keras.optimizers.Adam(learning_rate=0.0003)
callback = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)

In [18]:
evaluation_dataframe = pd.DataFrame()
for i in range(len(dataframes)):
    X_train, X_test, y_train, y_test = data_preparation(dataframes[i], sample=False)

    # 2-stacked RNN
    print("\nTraining 2 stacked RNN model\n")
    rnn_model = stacked_RNN(input_sequence_shape=input_sequence_shape)
    rnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics='accuracy')
    history = rnn_model.fit(X_train, y_train, epochs=EPOCHS, batch_size=128, validation_split=VAL_SPLIT, callbacks=[callback])
    eval=rnn_model.evaluate(X_test, y_test)
    evaluation_dataframe = pd.concat([evaluation_dataframe, pd.DataFrame({'dataset':[attack_types[i]], 'modele':['2-stacked-rnn'], 'accuracy':[eval[1]]})])

    # stacked LSTM small
    print("\nTraining stacked small LSTM model\n")
    lstm_model = stacked_LSTM_small(input_sequence_shape)
    opt = keras.optimizers.SGD(learning_rate=0.01)
    lstm_model.compile(loss='binary_crossentropy', optimizer=opt, metrics='accuracy')
    history = lstm_model.fit(X_train, y_train, epochs=EPOCHS, batch_size=32, validation_split=VAL_SPLIT, callbacks=[callback])
    eval=lstm_model.evaluate(X_test, y_test)
    evaluation_dataframe = pd.concat([evaluation_dataframe, pd.DataFrame({'dataset':[attack_types[i]], 'modele':['stacked-small-lstm'], 'accuracy':[eval[1]]})])

    # mix lstm rnn
    print("\nTraining RNN-LSTM model\n")
    lstm_rnn_model = mix_rnn_lstm(input_sequence_shape)
    opt = keras.optimizers.SGD(learning_rate=0.01)
    lstm_rnn_model.compile(loss='binary_crossentropy', optimizer=opt, metrics='accuracy')
    history = lstm_rnn_model.fit(X_train, y_train, epochs=EPOCHS, batch_size=32, validation_split=VAL_SPLIT, callbacks=[callback])
    eval=lstm_rnn_model.evaluate(X_test, y_test)
    evaluation_dataframe = pd.concat([evaluation_dataframe, pd.DataFrame({'dataset':[attack_types[i]], 'modele':['lstm-rnn'], 'accuracy':[eval[1]]})])

    # 3 Stacked GRU
    print("\nTraining 3 stacked GRU\n")
    stacked_GRU_model = stacked_GRU(input_sequence_shape)
    opt = keras.optimizers.SGD(learning_rate=0.01)
    stacked_GRU_model.compile(loss='binary_crossentropy', optimizer=opt, metrics='accuracy')
    history = stacked_GRU_model.fit(X_train, y_train, epochs=EPOCHS, batch_size=64, validation_split=VAL_SPLIT, callbacks=[callback])
    eval=stacked_GRU_model.evaluate(X_test, y_test)
    evaluation_dataframe = pd.concat([evaluation_dataframe, pd.DataFrame({'dataset':[attack_types[i]], 'modele':['3-stacked-GRU'], 'accuracy':[eval[1]]})])

    # simple GRU
    print("\nTraining simple GRU\n")
    simple_GRU_model = simple_GRU(input_sequence_shape)
    opt = keras.optimizers.SGD(learning_rate=0.01)
    simple_GRU_model.compile(loss='binary_crossentropy', optimizer=opt, metrics='accuracy')
    history = simple_GRU_model.fit(X_train, y_train, epochs=EPOCHS, batch_size=64, validation_split=VAL_SPLIT, callbacks=[callback])
    eval=simple_GRU_model.evaluate(X_test, y_test)
    evaluation_dataframe = pd.concat([evaluation_dataframe, pd.DataFrame({'dataset':[attack_types[i]], 'modele':['simple-gru'], 'accuracy':[eval[1]]})])

    # stacked LSTM
    print("\nTraining 3 stacked LSTM\n")
    stacked_LSTM_model = stacked_LSTM(input_sequence_shape)
    stacked_LSTM_model.compile(loss='mean_absolute_error', optimizer=ADAM, metrics='accuracy')
    history = stacked_LSTM_model.fit(X_train, y_train, epochs=EPOCHS, batch_size=64, validation_split=VAL_SPLIT, callbacks=[callback])
    eval = stacked_LSTM_model.evaluate(X_test, y_test)
    evaluation_dataframe = pd.concat([evaluation_dataframe, pd.DataFrame({'dataset':[attack_types[i]], 'modele':['3-stacked-LSTM'], 'accuracy':[eval[1]]})])

Nombre de séquences :  469053
X_train :  (900, 20, 6)
y_train :  (900,)
X_test :  (100, 20, 6)
y_test :  (100,)
Training 2 stacked RNN model
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Training stacked small LSTM model
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Training RNN-LSTM model
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Training 3 stacked GRU
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Training simple GRU
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Training 3 stacked LSTM
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Nombre de séquences :  461621
X_train :  (900, 20, 6)
y_train :  (900,)
X_test :  (100, 20, 6)
y_test :  (100,)
Training 2 stacked RNN model
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Training stacked small LSTM model
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/2

In [21]:
evaluation_dataframe.to_csv('results_accuracy.csv')