In [1]:
import os, platform, pprint, sys
# import fastai
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn


seed: int = 14


# set up pretty printer for easier data evaluation
pretty = pprint.PrettyPrinter(indent=4, width=30).pprint


# declare file paths for the data we will be working on
file_path_1: str = '../../data/ddos/DDoS_vs_Benign_10000.csv'
file_path_2: str = '../../data/ddos/DDoS_vs_Benign_50000.csv'
dataPath   : str = './models'


# print library and python versions for reproducibility
print(
    f'''
    python:\t{platform.python_version()}

    \tmatplotlib:\t{mpl.__version__}
    \tnumpy:\t\t{np.__version__}
    \tpandas:\t\t{pd.__version__}
    \tsklearn:\t{sklearn.__version__}
    '''
)


    python:	3.7.10

    	matplotlib:	3.3.4
    	numpy:		1.20.3
    	pandas:		1.2.5
    	sklearn:	0.24.2
    


In [2]:
def load_data(filePath: str) -> pd.DataFrame:
    '''
        Loads the Dataset from the given filepath and caches it for quick access in the future
        Function will only work when filepath is a .csv file
    '''

    # slice off the ./CSV/ from the filePath
    if filePath[0] == '.' and filePath[1] == '.':
        filePathClean: str = filePath[11::]
        pickleDump: str = f'../../data/cache/{filePathClean}.pickle'
    else:
        pickleDump: str = f'../../data/cache/{filePath}.pickle'
    
    print(f'Loading Dataset: {filePath}')
    print(f'\tTo Dataset Cache: {pickleDump}\n')
    
    # check if data already exists within cache
    # if not, load data and cache it
    if os.path.exists(pickleDump):
        df = pd.read_pickle(pickleDump)
    else:
        df = pd.read_csv(filePath, low_memory=True)
        df.to_pickle(pickleDump)
    
    return df

In [3]:
df = load_data(file_path_1)

Loading Dataset: ../../data/ddos/DDoS_vs_Benign_10000.csv
	To Dataset Cache: ../../data/cache/ddos/DDoS_vs_Benign_10000.csv.pickle



In [4]:
df.shape

(9992, 70)

In [5]:
X = df.values[:, :-1]

In [6]:
y = df.values[:,-1:]

In [7]:
X,y

(array([[6, 55043, 3, ..., 0.0, 0.0, 0],
        [17, 22530, 2, ..., 0.0, 0.0, 0],
        [17, 31817325, 24, ..., 28821270.0, 28821270.0, 1],
        ...,
        [6, 1, 2, ..., 0.0, 0.0, 1],
        [6, 1, 2, ..., 0.0, 0.0, 1],
        [6, 23736759, 6, ..., 12301020.0, 11435636.0, 1]], dtype=object),
 array([['BENIGN'],
        ['BENIGN'],
        ['BENIGN'],
        ...,
        ['DDOS'],
        ['DDOS'],
        ['DDOS']], dtype=object))

In [17]:
I = np.array([.3,.84])
vec = np.array([np.sqrt(2),np.sqrt(1.5)])
m0 = np.array([0, 0, 1, 1, 1])
W=np.array([[np.sqrt(2), .34,.12,.01,.41],
            [.23,np.sqrt(1.2),.4,.5,np.sqrt(2.2)]])
B=np.array([.1,.2,.3,.11,.24])
M = np.diag(m0)

In [20]:
a = np.matmul(W,M)

In [22]:
np.matmul(I,a)+np.matmul(B,M)

array([0.        , 0.        , 0.672     , 0.533     , 1.60892135])

In [23]:
I*vec

array([0.42426407, 1.02878569])

In [27]:
np.dot(I, vec)

1.4530497606808632

In [28]:
m0*W

array([[0.       , 0.       , 0.12     , 0.01     , 0.41     ],
       [0.       , 0.       , 0.4      , 0.5      , 1.4832397]])

In [29]:
a

array([[0.       , 0.       , 0.12     , 0.01     , 0.41     ],
       [0.       , 0.       , 0.4      , 0.5      , 1.4832397]])

In [30]:
m0*B

array([0.  , 0.  , 0.3 , 0.11, 0.24])

In [31]:
np.matmul(B,M)

array([0.  , 0.  , 0.3 , 0.11, 0.24])

In [33]:
def f(x):
    return 1/(1+np.exp(x))

In [34]:
f(np.matmul(I,a))+np.matmul(B,M)

array([0.5       , 0.5       , 0.70805784, 0.5057991 , 0.44279418])