In [1]:
# Library to read excel data
import pandas as pd 
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
def readData():

    # Read csv data
    print("---  Reading csv data            ---")
    dirpath = "resources/"
    filename = "kddcup99_csv.csv"
    kddCup = pd.read_csv(dirpath+filename)

    # Shuffle data
    print("---  Shuffle data                 ---")
    kddCup = kddCup.sample(frac=1)

    # Split data in 70% 15% 15%
    print("---  Split data in train, validate and test  ---")
    trainingData, validateData, testData = np.split(kddCup.sample(frac=1), [int(.7*len(kddCup)), int(.85*len(kddCup))])

    print("---  TrainingData 70% length = " + str(len(trainingData))+" ---")
    print("---  ValidateData 15% lenght = " + str(len(validateData))+" ---")
    print("---  TestData 15% length     = " + str(len(testData))+" ---")
    return trainingData, validateData, testData

In [3]:

def defineModel():
    print("---  Creating RNN Model          ---")
    # Define new Model for rnn
    model = tf.keras.models.Sequential()
    # Adding model first layer with 41 features (42 - label feature)
    model.add(tf.keras.layers.SimpleRNN(41))
    # Adding Hidden Layers
    # Adding Activation sigmoid on Hidden Layers
    model.add(tf.keras.layers.Dense(units=80,activation='sigmoid'))
    model.add(tf.keras.layers.Dense(units=160,activation='sigmoid'))
    model.add(tf.keras.layers.Dense(units=240,activation='sigmoid'))
    # Adding output layer (normal(0) - anomaly(1))
    model.add(tf.keras.layers.Dense(units=2,activation='softmax'))
    # Adding learning rate and metrics
    model.compile(optimizer=tf.keras.optimizers.Nadam(learning_rate=0.001),
                loss=tf.keras.losses.mean_squared_error,
                metrics=['accuracy'])
    return model


In [4]:

def showSummary(result,validationData):
    printStats(getStats(validationData,result))

def getStats(real,predicted):
    numClasses = real.shape[1]
    stats = []
    for i in range(0,numClasses):
        stats.append([])
        for j in range(0,numClasses):
            stats[i].append(0)
    for i in range(0,len(predicted)):
        p = np.argmax(predicted[i])
        r = np.argmax(real[i])
        stats[p][r]+=1
    metrics = {}
    for i in range(0,numClasses):
        tp = stats[i][i]
        fp = 0
        fn = 0
        for j in range(0,numClasses):
            if (i!=j):
                fp += stats[i][j]
                fn += stats[j][i]
        if (tp+fp) > 0: precision = tp/(tp+fp)
        else: precision = -1
        if (tp+fn) > 0: recall = tp/(tp+fn)
        else: recall = -1
        if precision >= 0 and recall >= 0 and (precision+recall>0): f1 = 2*precision*recall/(precision+recall)        
        else: f1 = -1
        metrics[i]= {'precision': precision,
                     'recall': recall,
                     'f1': f1 }
    return stats,metrics

def printStats(stats):
    cm,m=stats
    print("\nConfusion Matrix")
    print("P \ R\tnormal\tanomaly")
    print("normal\t"+str(cm[0][0])+"\t"+str(cm[0][1]))
    print("anomaly\t"+str(cm[1][0])+"\t"+str(cm[1][1]))
    print("")
    print("Metrics")
    print("Conexion\tPrecision\tRecall\tF-1")
    print("normal\t"+str(m[0]['precision'])+"\t"+str(m[0]['recall'])+"\t"+str(m[0]["f1"]))
    print("anomaly\t"+str(m[1]['precision'])+"\t"+str(m[1]['recall'])+"\t"+str(m[1]["f1"]))
    print("\n")
  

In [5]:
def normalizeColumn(data):
   arrayData = np.array(data)
   normalizedData = preprocessing.normalize(arrayData)
   return normalizedData

In [6]:
def transformColumn(column, array, default):
   rows = list(set(column.tolist()))
   for row in rows:
      try:
         column = column.replace([row],array[row])
      except KeyError:
         column = column.replace([row],default)
   return column
# Library to transform data
def transformData(kddCup):
    # Transform Text data to number
    protocolTypeLabel = {'icmp': 0, 'tcp': 1, 'udp': 2}
    kddCup["protocol_type"] = transformColumn(kddCup["protocol_type"],protocolTypeLabel,2)
    # print(kddCup["protocol_type"])
    flagLabel = {'OTH': 0, 'REJ': 1, 'RSTO': 2, 'RSTOS0': 3, 'RSTR': 4, 'S0': 5, 'S1': 6, 'S2': 7, 'S3': 8, 'SF': 9, 'SH': 10}
    kddCup["flag"] = transformColumn(kddCup["flag"],flagLabel,1)
    # print(kddCup["flag"])
    serviceLabel = {'auth': 0, 'bgp': 1, 'courier': 2, 'csnet_ns': 3, 'ctf': 4,'daytime': 5, 'discard': 6,
               'domain': 7, 'domain_u': 8, 'echo': 9, 'eco_i': 10, 'ecr_i': 11, 'efs': 12,
               'exec': 13, 'finger': 14, 'ftp': 15, 'ftp_data': 16, 'gopher': 17, 'hostnames': 18,
               'http': 19, 'http_443': 20, 'imap4': 21, 'IRC': 22, 'iso_tsap': 23, 'klogin': 24, 'kshell': 25,
               'ldap': 26, 'link': 27, 'login': 28, 'mtp': 29, 'name': 30, 'netbios_dgm': 31, 'netbios_ns': 32,
               'netbios_ssn': 33, 'netstat': 34, 'nnsp': 35, 'nntp': 36, 'ntp_u': 37, 'other': 38,
               'pm_dump': 39, 'pop_2': 40, 'pop_3': 41, 'printer': 42, 'private': 43, 'red_i': 44,
               'remote_job': 45, 'rje': 46, 'shell': 47, 'smtp': 48, 'sql_net': 49, 'ssh': 50,
               'sunrpc': 51, 'supdup': 52, 'systat': 53, 'telnet': 54, 'tftp_u': 55, 'tim_i': 56, 'time': 57,
               'urh_i': 58, 'urp_i': 59, 'uucp': 60, 'uucp_path': 61, 'vmnet': 62, 'whois': 63,
               'X11': 64, 'Z39_50': 65}
    kddCup["service"] = transformColumn(kddCup["service"],serviceLabel,19)          
    transformLabel = {'normal': 0}
    kddCup["label"] = transformColumn(kddCup["label"],transformLabel,1)
    # print(kddCup["label"])
    return kddCup


In [7]:

def trainModel(epochs,  model, trainingData, validateData):
    print("---  Transform and Normalize Training Data    ---")
    TrainingDataX, TrainingDataY = transformData(trainingData)
    # Training Model
    print("---  Training model    ---")
    model.fit(TrainingDataX,TrainingDataY,epochs=epochs,batch_size=25,shuffle=True)
    # validation_data=(validateDataX,validateDataY))
    results = model.predict(TrainingDataX)

    print("\n\n---  Results:            ---")
    showSummary(TrainingDataY,results)
    return model

def validateModel(model, validateData):
    print("---  Transform and Normalize Validate Data    ---")
    validateDataX, ValidateDataY = transformData(validateData)
    # Validate Model
    print("---  Validate model    ---")
    results = model.predict(validateDataX)
    print("\n\n---  Results:            ---")
    showSummary(ValidateDataY,results)
    return model

def transformData(data):
    # Transform text to number
    print("---   Transform text to number        ---")
    data = transformData(data)
    data["label"]
    # Split and remove columns
    print("---   Remove unused columns           ---")
    kddCupY = data["label"]
    kddCupY = kddCupY.to_numpy()   
    # Assing category data shape 
    kddCupY = keras.utils.to_categorical(kddCupY,2)
    kddCupX = data
    kddCupX.pop("label")
    kddCupX = kddCupX.to_numpy()
    # Normalize data
    print("---  Normalize Colum Data            ---")
    normalizeDataX = normalizeColumn(kddCupX)
    # Reshape
    print("---  Reshaping Data                  ---")
    normalizeDataX = normalizeDataX.reshape((normalizeDataX.shape[0],normalizeDataX.shape[1],-1))
    return normalizeDataX, kddCupY


In [8]:
# Program header
print("---  Simple RNN Neural Network   ---")
print("---  Starting Neural Network     ---")

# Get Data
trainingData, validateData, testData = readData()
# Create Model
model = defineModel()
# Train Model
model = trainModel(3, model, trainingData, validateData)
# Validate Model
option = validateModel(model, validateData)

    ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to number        ---
---   Transform text to nu