In [1]:
from uuid import uuid4
import pandas as pd
import numpy as np
from sklearn import metrics
from datetime import datetime
from sklearn.preprocessing import StandardScaler
import keras
import os
import logging
logging.basicConfig(level=logging.DEBUG)

loaded_model = keras.models.load_model("./autoencoder_model")
hex_lambda = lambda x: int(x,16)

Metal device set to: Apple M1 Pro


2022-03-24 20:47:14.823385: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-03-24 20:47:14.823481: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [2]:
column_order = ['uuid', 'timestamp', 'length','timestamp','ip.src','ip.dst','protocol','protocol.sport',
                'protocol.dport','source_internal','source_external','destination_internal',
                'destination_external', 'score']

In [9]:
def process_df(unprocessed_df):
    
    final_df = pd.DataFrame()

    # IP stuff 
    ip_src_list = list(unprocessed_df['ip.src'])

    octet_1_src = []
    octet_2_src = []
    octet_3_src = []
    octet_4_src = []

    for eth in ip_src_list:
        int_ip_list = eth.split('.')
        octet_1_src.append(int_ip_list[0])
        octet_2_src.append(int_ip_list[1])
        octet_3_src.append(int_ip_list[2])
        octet_4_src.append(int_ip_list[3])

    final_df['octet_1_ip_src'] = octet_1_src 
    final_df['octet_2_ip_src'] = octet_2_src
    final_df['octet_3_ip_src'] = octet_3_src
    final_df['octet_4_ip_src'] = octet_4_src
    
    
    ip_dst_list = list(unprocessed_df['ip.dst'])

    octet_1_dst = []
    octet_2_dst = []
    octet_3_dst = []
    octet_4_dst = []

    for eth in ip_dst_list:
        int_ip_list = eth.split('.')
        octet_1_dst.append(int_ip_list[0])
        octet_2_dst.append(int_ip_list[1])
        octet_3_dst.append(int_ip_list[2])
        octet_4_dst.append(int_ip_list[3])

    final_df['octet_1_ip_dst'] = octet_1_dst
    final_df['octet_2_ip_dst'] = octet_2_dst
    final_df['octet_3_ip_dst'] = octet_3_dst
    final_df['octet_4_ip_dst'] = octet_4_dst

    # columns to get from the csv straight away
    list_of_cols_straight = ['length','protocol.sport', 'protocol.dport', 'source_internal',
           'source_external', 'destination_internal', 'destination_external']

    for col in list_of_cols_straight:
        final_df[str(col)] = list(unprocessed_df[str(col)])

    dummy_protocol = pd.get_dummies(unprocessed_df['protocol'])
    final_df['TCP'] = list(dummy_protocol['TCP'])
    try:
        final_df['UDP'] = list(dummy_protocol['UDP'])
    except:
        final_df['UDP'] = 0
    
    # not scaling one hot encoded columns
    columns_not_to_scale = ['TCP','UDP', 'source_internal','source_external', 'destination_internal', 'destination_external']
    
    time_df = pd.DataFrame(unprocessed_df['timestamp'])
    
    time_df['packet'] = 1
    time_df['timestamp'] = time_df['timestamp'].apply(lambda x: datetime.fromtimestamp(x))
    time_df = time_df.set_index('timestamp')
    
    time_df = time_df.resample('10s').sum()
    
    packet_flow_list = []
    
    for x in list(time_df['packet']):
        packet_flow_list += [x]*x 
    
    final_df['packet_flow'] = packet_flow_list
    
    
    final_df = final_df.astype('float64')
    
    
    #Normailizing the data
    # epsilon = 1e-7
    
    for col in final_df.columns:
        if col not in columns_not_to_scale:
            scale = StandardScaler().fit(final_df[[col]])
            final_df[col] = scale.transform(final_df[[col]])
        
    return final_df

In [10]:
logging.debug("Processing and running prediction for f{path_to_csv}")
unprocessed_df = pd.read_csv("./received_csv_files/wordpress1.csv")
final_df = process_df(unprocessed_df)

DEBUG:root:Processing and running prediction for f{path_to_csv}


In [11]:
final_df

Unnamed: 0,octet_1_ip_src,octet_2_ip_src,octet_3_ip_src,octet_4_ip_src,octet_1_ip_dst,octet_2_ip_dst,octet_3_ip_dst,octet_4_ip_dst,length,protocol.sport,protocol.dport,source_internal,source_external,destination_internal,destination_external,TCP,UDP,packet_flow
0,-0.201429,0.112878,-0.196450,-0.488156,-0.253972,0.309807,-0.252835,-0.308945,-0.220816,-0.733448,1.110736,0.0,1.0,1.0,0.0,1.0,0.0,-2.310732
1,-0.201429,0.112878,-0.196450,-0.488156,-0.253972,0.309807,-0.252835,-0.308945,-0.195166,-0.733448,1.110736,0.0,1.0,1.0,0.0,1.0,0.0,-2.310732
2,-0.201429,0.250210,-0.196450,-0.246035,1.886493,-2.436405,1.092129,3.278894,-0.175590,1.284583,-0.729208,1.0,0.0,0.0,1.0,1.0,0.0,-2.310732
3,2.462882,-3.217427,1.689524,4.077560,-0.253972,0.309807,-0.252835,-0.308945,-0.230942,-0.790043,1.368575,0.0,1.0,1.0,0.0,1.0,0.0,-2.310732
4,2.462882,-3.217427,1.689524,4.077560,-0.253972,0.309807,-0.252835,-0.308945,-0.186391,-0.790043,1.368575,0.0,1.0,1.0,0.0,1.0,0.0,-2.310732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42855,-0.201429,0.250210,-0.196450,-0.557333,-0.253972,0.309807,-0.252835,-0.452458,-0.230942,0.931432,0.013682,1.0,0.0,1.0,0.0,1.0,0.0,-1.424277
42856,-0.201429,0.250210,-0.196450,-0.557333,-0.253972,0.309807,-0.252835,-0.452458,-0.230942,0.931432,0.013682,1.0,0.0,1.0,0.0,1.0,0.0,-1.424277
42857,-0.201429,0.250210,-0.196450,-0.557333,-0.253972,0.309807,-0.252835,-0.452458,-0.230942,0.931432,0.013682,1.0,0.0,1.0,0.0,1.0,0.0,-1.424277
42858,-0.201429,0.250210,-0.196450,-0.418978,-0.253972,0.309807,-0.252835,-0.567269,-0.226891,-0.055353,1.011482,1.0,0.0,1.0,0.0,1.0,0.0,-1.424277
