In [2]:
'''This script is intended to study the accuracy of a neural
network model of the UNSW_NB15 wireless intrusion dataset.
'''
from __future__ import print_function

import math

from IPython import display
from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
#IMPORT KERAS LIBRARIES USING TENSORFLOW BACKEND
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import np_utils

pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

#IMPORT TRAINING AND TEST DATA IN PANDA DATAFRAMES

unswnb15_train = pd.read_csv("C:/Users/SYDNEY/Documents/Sydney/PhD/Python/UNSW-NB15/data/UNSW_NB15_TRAINING.csv", sep=",")
unswnb15_test = pd.read_csv("C:/Users/SYDNEY/Documents/Sydney/PhD/Python/UNSW-NB15/data/UNSW_NB15_TEST.csv", sep=",")

#INTRUSIONS/ATTACKS TYPES MAPPING (One-hot-encoding can also be used)
class_mapping = {
    'Normal': 0,
    'Generic': 1,
    'Exploits': 2,
    'Fuzzers': 3,
    'DoS': 4,
    'Reconnaissance': 5,
    'Analysis': 6,
    'Backdoor': 7,
    'Shellcode': 8,
    'Worms': 9
}
state_mapping = {
        'INT': 1,
        'FIN': 2,
        'CON': 3,
        'REQ': 4,
        'RST': 5,
        'ECO': 6,
        'PAR': 7,
        'no':  8,
        'URN': 9
    }
state_test_mapping = {
        'FIN': 2,
        'INT': 1,
        'CON': 3,    
        'REQ': 4,
        'ACC': 10,
        'RST': 5,
        'CLO': 11
}

service_mapping = {
    '-': 11,        
    'dns': 12,         
    'http': 13,      
    'smtp': 14,         
    'ftp-data': 15,     
    'ftp': 16,         
    'ssh': 17,        
    'pop3': 18,        
    'dhcp': 19,           
    'snmp': 20,          
    'ssl':21,            
    'irc':22,            
    'radius': 23       
}

proto_mapping = {
        'tcp': 1,           
        'udp': 2,        
        'unas':3,         
        'arp': 4,             
        'ospf': 5,           
        'sctp': 6,            
        'any':7,            
        'gre':8,              
        'ipv6':9,            
        'sun-nd': 10,           
        'swipe':11,            
        'pim': 12,              
        'mobile': 13,           
        'rsvp':14,             
        'sep': 15,             
        'ib':  16,             
        'sprite-rpc': 17,      
        'ttp': 18,             
        'smp': 19,            
        'visa': 20,
        'sps': 21,            
        'vines':22,           
        'ipv6-frag':23,      
        'ipip': 24,           
        'merit-inp': 25,       
        'idpr': 26,            
        'xtp': 27,            
        'il':  28,             
        'iatp': 29,             
        'scps': 30,            
        'gmtp': 31,           
        'pnni': 32,           
        'pvp': 33,            
        'mfe-nsp': 34,         
        'vmtp':35,             
        'snp': 36,            
        'ptp': 37,           
        'vrrp': 38,             
        'l2tp': 39,            
        'sm': 40,               
        'wsn': 41,             
        'qnx': 42,             
        'ipv6-opts':43,        
        'zero': 44,            
        'mtp': 45,             
        'tp++': 46,             
        'pipe': 47,            
        'secure-vmtp':48,      
        'ipcomp': 49,         
        'ipx-n-ip': 50,         
        'uti':51,              
        'ifmp': 52,             
        'sat-mon': 53,          
        'sdrp': 54,            
        'ippc': 55,           
        'bna': 56,              
        'idpr-cmtp': 57,       
        'encap': 58,           
        'wb-mon': 59,           
        'idrp': 60,            
        'crudp': 61,           
        'fc':  62,             
        'tlsp': 63,            
        'wb-expak': 64,        
        'larp': 65,             
        'ddx': 66,              
        'dgp': 67,            
        'compaq-peer': 68,     
        'rvd': 69,             
        'fire': 70,            
        'a/n': 71,              
        'ipv6-route': 72,      
        'eigrp': 73,           
        'iso-ip': 74,          
        'mhrp': 75,             
        'cftp': 76,           
        'pri-enc': 77,          
        'micp': 78,            
        'srp' : 79,            
        'kryptolan' :80,       
        'ipv6-no': 81,        
        'narp' : 82,           
        'ipcv': 83,            
        'pgm': 84,              
        'isis': 85,            
        'ax.25': 86,            
        'cpnx': 87,             
        '3pc': 88,            
        'tcf': 89,            
        'stp': 90,            
        'i-nlsp': 91,           
        'aris': 92,             
        'cphb': 93,               
        'skip': 94,             
        'etherip': 95,           
        'br-sat-mon': 96,        
        'ddp': 97,              
        'sccopmce': 98,         
        'aes-sp3-d': 99,         
        'nsfnet-igp': 100,        
        'sat-expak' : 101,       
        'iplt': 102,              
        'leaf-2': 103,            
        'dcn': 104,               
        'pup': 105,               
        'nvp': 106,               
        'trunk-1': 107,            
        'cbt': 108,                 
        'trunk-2': 109,            
        'crtp': 110,               
        'leaf-1': 111,             
        'chaos': 112,             
        'igp': 113,                
        'iso-tp4': 114,           
        'ggp': 115,                
        'emcon': 116,              
        'xnet': 117,              
        'ip': 118,                
        'ipnip': 119,              
        'st2': 120,               
        'mux': 121,               
        'irtp': 122,              
        'prm': 123,             
        'xns-idp': 124,            
        'hmp': 125,               
        'egp': 126,               
        'rdp': 127,               
        'netblt': 128,            
        'bbn-rcc': 129,            
        'argus' : 130,            
        'igmp': 131,              
        'icmp': 132,              
        'rtp':  133               
}

#STEP 1 MAPPING  USING .map(dict_name)
unswnb15_train["attack_cat"] = unswnb15_train["attack_cat"].map(class_mapping)
unswnb15_test["attack_cat"] = unswnb15_test["attack_cat"].map(class_mapping)

#STEP 2 MAPPING  - 'state' FEATURE
unswnb15_train["state"] = unswnb15_train["state"].map(state_mapping)
unswnb15_test["state"] = unswnb15_test["state"].map(state_test_mapping)

#STEP 3 MAPPING  - 'service' FEATURE
unswnb15_train["service"] = unswnb15_train["service"].map(service_mapping)
unswnb15_test["service"] = unswnb15_test["service"].map(service_mapping)

#STEP 4 MAPPING  - 'proto' FEATURE
unswnb15_train["proto"] = unswnb15_train["proto"].map(proto_mapping)
unswnb15_test["proto"] = unswnb15_test["proto"].map(proto_mapping)

#Normalization function
def log_normalize(series):
  return series.apply(lambda x:math.log(x+1.0))

def process_features(dataset):
    '''This function retrives and normalizes the inputs'''
    
    #Declare an empty dataframe
    proccessed_features = pd.DataFrame()
    #Retrieve columns names
    col_names = list(dataset)
    
    for feature_index in range(0,41):
        proccessed_features[col_names[feature_index]] = log_normalize(dataset[col_names[feature_index]])
    
    return proccessed_features

def process_targets(dataset):
    """This function returns the label/output"""
    targets = pd.DataFrame()
    targets['label'] = dataset['label']
    return targets

#Random Permutations
unswnb15_train = unswnb15_train.reindex(np.random.permutation(unswnb15_train.index))
unswnb15_test = unswnb15_test.reindex(np.random.permutation(unswnb15_test.index))

#75 % training data - User for training our models 131506
training_data_features = process_features(unswnb15_train.head(131506))
training_data_labels = process_targets(unswnb15_train.head(131506))
encode_training_targets = np_utils.to_categorical(training_data_labels)

#25 % training data / validation - used for validating our models 43835
validation_data_features = process_features(unswnb15_train.tail(43835))
validation_data_labels = process_targets(unswnb15_train.tail(43835))
encode_validation_targets = np_utils.to_categorical(validation_data_labels)

# Test features and targets
test_data_features = process_features(unswnb15_test)
test_data_labels = process_targets(unswnb15_test)
encode_test_targets = np_utils.to_categorical(test_data_labels)


#Build the neural network model 
nn_model = Sequential()
nn_model.add(Dense(50, input_dim=41, activation='relu'))
nn_model.add(Dense(50, activation='relu'))
nn_model.add(Dense(50, activation='relu'))
nn_model.add(Dense(2, activation='softmax'))

# Compiling model
nn_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training a model / Fit Model
neural_net_model= nn_model.fit(training_data_features, encode_training_targets,validation_data=(validation_data_features,encode_validation_targets), epochs=100, batch_size=1000, verbose=0)

#### evaluate the model ####
#validation Scores
validation_scores = nn_model.evaluate(validation_data_features, encode_validation_targets)
#Test Scores
test_scores = nn_model.evaluate(test_data_features, encode_test_targets)

print("\n Validation Accuracy: %.2f%%" % (validation_scores[1]*100))
print("\n Test Accuracy: %.2f%%" % (test_scores[1]*100))



Instructions for updating:
Use tf.cast instead.

 Validation Accuracy: 96.77%

 Test Accuracy: 58.83%
