In [268]:
import pickle
import torch
from torch import nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import util
import model
import dataset

In [269]:
with open('sec-bert.pkl', 'rb') as file:
    net = pickle.load(file)

In [270]:
batch_size, max_len = 512, 64
train_iter, vocab = util.load_csic_data(batch_size, max_len)

In [271]:
devices = util.try_all_gpus()

In [272]:
def get_bert_encoding(net, tokens_a, tokens_b=None):
    tokens, segments = dataset.get_tokens_and_segments(tokens_a, tokens_b)
    # print(f'debug tokens: {tokens}')
    token_ids = torch.tensor(vocab[tokens], device=devices[0]).unsqueeze(0)
    segments = torch.tensor(segments, device=devices[0]).unsqueeze(0)
    valid_len = torch.tensor(len(tokens), device=devices[0]).unsqueeze(0)
    encoded_X, _, _ = net(token_ids, segments, valid_len)
    return encoded_X

In [273]:
tokens_a = ['id 3', 'nombre vino+rioja', 'precio 100', 'cantidad 55', 'b1 a%f1adir+al+carrito']
encoded_text = get_bert_encoding(net, tokens_a)
encoded_text_cls = encoded_text[:, 0, :]
encoded_text_crane = encoded_text[:, 2, :]
encoded_text.shape, encoded_text_cls.shape, encoded_text_crane[0][:3]

(torch.Size([1, 7, 256]),
 torch.Size([1, 256]),
 tensor([-0.0820, -0.1346, -2.5972], device='cuda:0', grad_fn=<SliceBackward0>))

In [274]:
encoded_text

tensor([[[-2.3438e+00, -1.4685e-01, -1.0563e-01,  ...,  3.8171e-01,
           1.0301e-01, -1.4891e-01],
         [-2.3906e+00, -1.6602e-01, -2.6789e+00,  ...,  3.5109e-01,
           9.9047e-01, -1.5434e-03],
         [-8.2047e-02, -1.3463e-01, -2.5972e+00,  ...,  3.8552e-01,
           1.0387e+00, -1.3291e-01],
         ...,
         [-2.2967e+00, -1.3559e-01, -2.5641e+00,  ...,  1.0996e-02,
           9.6580e-01, -1.4627e-01],
         [-2.3997e+00, -1.6335e-01, -2.6900e+00,  ...,  3.6695e-01,
           1.0173e+00, -1.6488e-01],
         [-2.3338e+00,  3.1014e-02, -8.2881e-02,  ...,  4.0236e-01,
           1.0504e+00,  3.9550e-02]]], device='cuda:0',
       grad_fn=<NativeLayerNormBackward0>)

In [275]:
encoded_text[:, 0, :]

tensor([[-2.3438e+00, -1.4685e-01, -1.0563e-01,  2.1841e+00, -4.0009e-01,
         -5.1557e-01,  7.4709e-01,  1.4610e+00,  3.0104e-01, -7.3642e-01,
          2.4291e-01,  1.7185e-01,  2.7260e+00, -1.3006e-01, -3.3634e-01,
          6.7000e-01,  1.0143e-01, -3.1286e-02, -1.8552e+00,  2.6882e-01,
         -8.8442e-01,  2.6239e-01,  4.9982e-04, -3.0157e+00,  1.9896e-01,
          8.2283e-03,  9.8394e-02, -7.7912e-02,  3.5997e-02,  1.0916e+00,
         -1.9430e-02, -6.6414e-01,  5.7248e-02, -1.8587e+00, -4.5302e-03,
         -1.3188e+00,  9.5264e-02,  3.2527e-01, -1.6518e+00, -4.9450e-02,
         -1.6034e-01,  2.4550e-01,  2.9721e-02,  2.7296e-01,  9.9985e-01,
         -3.8937e-01, -3.9080e-01, -2.4112e+00, -1.3981e+00, -6.4957e-01,
         -2.2315e+00, -5.8994e-01,  2.1221e+00, -1.2547e+00, -8.1408e-01,
          2.2368e+00, -1.9008e+00, -1.2372e+00, -2.0483e+00, -2.1654e-01,
          4.8634e-01,  1.6792e+00, -1.4106e-01, -1.7767e+00, -1.9272e+00,
         -1.3519e+00,  2.4282e-02, -5.

In [276]:
len(encoded_text[:, 0, :][0])

256

In [277]:
t = encoded_text[:, 0, :].to('cpu')
t

tensor([[-2.3438e+00, -1.4685e-01, -1.0563e-01,  2.1841e+00, -4.0009e-01,
         -5.1557e-01,  7.4709e-01,  1.4610e+00,  3.0104e-01, -7.3642e-01,
          2.4291e-01,  1.7185e-01,  2.7260e+00, -1.3006e-01, -3.3634e-01,
          6.7000e-01,  1.0143e-01, -3.1286e-02, -1.8552e+00,  2.6882e-01,
         -8.8442e-01,  2.6239e-01,  4.9982e-04, -3.0157e+00,  1.9896e-01,
          8.2283e-03,  9.8394e-02, -7.7912e-02,  3.5997e-02,  1.0916e+00,
         -1.9430e-02, -6.6414e-01,  5.7248e-02, -1.8587e+00, -4.5302e-03,
         -1.3188e+00,  9.5264e-02,  3.2527e-01, -1.6518e+00, -4.9450e-02,
         -1.6034e-01,  2.4550e-01,  2.9721e-02,  2.7296e-01,  9.9985e-01,
         -3.8937e-01, -3.9080e-01, -2.4112e+00, -1.3981e+00, -6.4957e-01,
         -2.2315e+00, -5.8994e-01,  2.1221e+00, -1.2547e+00, -8.1408e-01,
          2.2368e+00, -1.9008e+00, -1.2372e+00, -2.0483e+00, -2.1654e-01,
          4.8634e-01,  1.6792e+00, -1.4106e-01, -1.7767e+00, -1.9272e+00,
         -1.3519e+00,  2.4282e-02, -5.

In [278]:
t.detach().numpy()

array([[-2.34380722e+00, -1.46851286e-01, -1.05634078e-01,
         2.18410015e+00, -4.00088310e-01, -5.15574455e-01,
         7.47092426e-01,  1.46104312e+00,  3.01035166e-01,
        -7.36419380e-01,  2.42912337e-01,  1.71847701e-01,
         2.72603345e+00, -1.30058169e-01, -3.36336970e-01,
         6.69996023e-01,  1.01433180e-01, -3.12864296e-02,
        -1.85521317e+00,  2.68820018e-01, -8.84423614e-01,
         2.62392670e-01,  4.99817485e-04, -3.01568007e+00,
         1.98962480e-01,  8.22834857e-03,  9.83939916e-02,
        -7.79117122e-02,  3.59972827e-02,  1.09164321e+00,
        -1.94299091e-02, -6.64142609e-01,  5.72479591e-02,
        -1.85865283e+00, -4.53018816e-03, -1.31881690e+00,
         9.52636600e-02,  3.25267166e-01, -1.65178823e+00,
        -4.94499728e-02, -1.60341740e-01,  2.45495528e-01,
         2.97210310e-02,  2.72962600e-01,  9.99847949e-01,
        -3.89367908e-01, -3.90804738e-01, -2.41124344e+00,
        -1.39808655e+00, -6.49568439e-01, -2.23148870e+0

In [279]:
torch.mean(encoded_text[:, 1, :])

tensor(-0.0014, device='cuda:0', grad_fn=<MeanBackward0>)

In [280]:
torch.mean(encoded_text[:, 1, :]).item()

-0.0014452189207077026

In [281]:
for item in encoded_text[0][1:-1]:
    print(torch.mean(item).item())

-0.0014452189207077026
-0.0017533078789710999
-4.1961669921875e-05
0.0005651339888572693
-0.0006000399589538574


In [335]:
df = pd.read_csv('../data/csic_database.csv')
df

Unnamed: 0.1,Unnamed: 0,Method,User-Agent,Pragma,Cache-Control,Accept,Accept-encoding,Accept-charset,language,host,cookie,content-type,connection,lenght,content,classification,URL
0,Normal,GET,Mozilla/5.0 (compatible; Konqueror/3.5; Linux)...,no-cache,no-cache,"text/xml,application/xml,application/xhtml+xml...","x-gzip, x-deflate, gzip, deflate","utf-8, utf-8;q=0.5, *;q=0.5",en,localhost:8080,JSESSIONID=1F767F17239C9B670A39E9B10C3825F4,,close,,,0,http://localhost:8080/tienda1/index.jsp HTTP/1.1
1,Normal,GET,Mozilla/5.0 (compatible; Konqueror/3.5; Linux)...,no-cache,no-cache,"text/xml,application/xml,application/xhtml+xml...","x-gzip, x-deflate, gzip, deflate","utf-8, utf-8;q=0.5, *;q=0.5",en,localhost:8080,JSESSIONID=81761ACA043B0E6014CA42A4BCD06AB5,,close,,,0,http://localhost:8080/tienda1/publico/anadir.j...
2,Normal,POST,Mozilla/5.0 (compatible; Konqueror/3.5; Linux)...,no-cache,no-cache,"text/xml,application/xml,application/xhtml+xml...","x-gzip, x-deflate, gzip, deflate","utf-8, utf-8;q=0.5, *;q=0.5",en,localhost:8080,JSESSIONID=933185092E0B668B90676E0A2B0767AF,application/x-www-form-urlencoded,Connection: close,Content-Length: 68,id=3&nombre=Vino+Rioja&precio=100&cantidad=55&...,0,http://localhost:8080/tienda1/publico/anadir.j...
3,Normal,GET,Mozilla/5.0 (compatible; Konqueror/3.5; Linux)...,no-cache,no-cache,"text/xml,application/xml,application/xhtml+xml...","x-gzip, x-deflate, gzip, deflate","utf-8, utf-8;q=0.5, *;q=0.5",en,localhost:8080,JSESSIONID=8FA18BA82C5336D03D3A8AFA3E68CBB0,,close,,,0,http://localhost:8080/tienda1/publico/autentic...
4,Normal,POST,Mozilla/5.0 (compatible; Konqueror/3.5; Linux)...,no-cache,no-cache,"text/xml,application/xml,application/xhtml+xml...","x-gzip, x-deflate, gzip, deflate","utf-8, utf-8;q=0.5, *;q=0.5",en,localhost:8080,JSESSIONID=7104E6C68A6BCF1423DAE990CE49FEE2,application/x-www-form-urlencoded,Connection: close,Content-Length: 63,modo=entrar&login=choong&pwd=d1se3ci%F3n&remem...,0,http://localhost:8080/tienda1/publico/autentic...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61060,Anomalous,GET,Mozilla/5.0 (compatible; Konqueror/3.5; Linux)...,no-cache,no-cache,"text/xml,application/xml,application/xhtml+xml...","x-gzip, x-deflate, gzip, deflate","utf-8, utf-8;q=0.5, *;q=0.5",en,localhost:8080,JSESSIONID=62216ADBBD9B91E17CA9AFEDCCC36275,,close,,,1,http://localhost:8080/tienda1/miembros/editar....
61061,Anomalous,POST,Mozilla/5.0 (compatible; Konqueror/3.5; Linux)...,no-cache,no-cache,"text/xml,application/xml,application/xhtml+xml...","x-gzip, x-deflate, gzip, deflate","utf-8, utf-8;q=0.5, *;q=0.5",en,localhost:8080,JSESSIONID=2317F5C0B1B7FAD18EB425E98DB102A3,application/x-www-form-urlencoded,Connection: close,Content-Length: 255,modo=registro&login=beaumont&password=quEratIt...,1,http://localhost:8080/tienda1/miembros/editar....
61062,Anomalous,GET,Mozilla/5.0 (compatible; Konqueror/3.5; Linux)...,no-cache,no-cache,"text/xml,application/xml,application/xhtml+xml...","x-gzip, x-deflate, gzip, deflate","utf-8, utf-8;q=0.5, *;q=0.5",en,localhost:8080,JSESSIONID=7F0BA54A88B849EF752006D388E15CDD,,close,,,1,http://localhost:8080/tienda1/miembros/editar....
61063,Anomalous,GET,Mozilla/5.0 (compatible; Konqueror/3.5; Linux)...,no-cache,no-cache,"text/xml,application/xml,application/xhtml+xml...","x-gzip, x-deflate, gzip, deflate","utf-8, utf-8;q=0.5, *;q=0.5",en,localhost:8080,JSESSIONID=FB018FFB06011CFABD60D8E8AD58CA21,,close,,,1,http://localhost:8080/tienda1/imagenes/3.gif/ ...


In [283]:
# drop rows with missing content
df.dropna(subset=['content'], inplace=True)

In [284]:
payload_df = df.iloc[:,[0, 14]]

In [285]:
payload_df.columns = ['Type', 'Content']

In [286]:
payload_df.iloc[:10,:]

Unnamed: 0,Type,Content
2,Normal,id=3&nombre=Vino+Rioja&precio=100&cantidad=55&...
4,Normal,modo=entrar&login=choong&pwd=d1se3ci%F3n&remem...
6,Normal,id=2
9,Normal,errorMsg=Credenciales+incorrectas
12,Normal,modo=insertar&precio=2672&B1=Pasar+por+caja
15,Normal,modo=registro&login=cen&password=40a5E&nombre=...
17,Normal,B2=Vaciar+carrito
27,Normal,modo=registro&login=de_la&password=roder%F3n&n...
38,Normal,id=3&nombre=Queso+Manchego&precio=100&cantidad...
40,Normal,modo=entrar&login=magrath&pwd=1an0re%E15354&re...


In [288]:
payloads = []
downstream_data = {
    "Type": [],
    "Payload": [],
    "Vector": [],
}
max_vec_len = 0

# for row in payload_df.itertuples(index=False):
# for row in payload_df.iloc[:2,:].itertuples(index=False):
for row in payload_df.itertuples(index=False):
    payload_type = row[0]
    payload = row[1]
    # print(payload)
    if len(payload.split('&')) >= 2:
        query_params = row[1].strip().lower().split('&')
        params_store = []
        for query_param in query_params:
            parts = query_param.split('=')
            # print(parts)
            # print(' '.join(parts))
            params_store.append(' '.join(parts))
            # payloads.append(parts)
        # print(params_store)
        encoded_text = get_bert_encoding(net, params_store)
        vector = encoded_text[:, 0, :][0].to('cpu').detach().numpy()
        downstream_data["Type"].append(payload_type)
        downstream_data["Payload"].append(payload)
        downstream_data["Vector"].append(vector)

        payloads.append(params_store)

In [289]:
master_df = pd.DataFrame(downstream_data)

In [290]:
master_df

Unnamed: 0,Type,Payload,Vector
0,Normal,id=3&nombre=Vino+Rioja&precio=100&cantidad=55&...,"[-2.3659832, -0.15886183, -0.10971304, 2.20664..."
1,Normal,modo=entrar&login=choong&pwd=d1se3ci%F3n&remem...,"[-2.3750374, -0.1567462, -0.116095155, 2.18917..."
2,Normal,modo=insertar&precio=2672&B1=Pasar+por+caja,"[-2.3900309, -0.14269774, -2.6847346, 0.132596..."
3,Normal,modo=registro&login=cen&password=40a5E&nombre=...,"[-2.3330877, 0.02016984, -2.6327085, 2.185007,..."
4,Normal,modo=registro&login=de_la&password=roder%F3n&n...,"[-0.069287404, 0.031637285, -2.762777, 2.33928..."
...,...,...,...
12066,Anomalous,modo=registro&login=hogan&password=cha377&nomb...,"[-2.3067095, 0.035235427, -0.0767258, 2.189000..."
12067,Anomalous,modo=registro&login=beaumont&password=quEratIt...,"[-2.3803675, -0.18270046, -2.6728156, 2.123513..."
12068,Anomalous,modo=registro&login=beaumont&password=quEratIt...,"[-2.3629584, -0.16883184, -2.657836, 0.0550561..."
12069,Anomalous,modo=registro&login=%2Bmel%2Fhem&password=quEr...,"[-0.10414417, 0.0034454542, -2.6000686, 2.1151..."


In [291]:
# max_vec_len

In [292]:
# def expand_space(row):
#     if len(row['Vector']) < max_vec_len:
#         padding_size = max_vec_len - len(row['Vector'])
#         row['Vector'].extend([0] * padding_size)
#     return row

In [293]:
# master_df.apply(expand_space, axis=1)

In [294]:
# Function to split list into multiple columns
def split_list(row):
    return pd.Series(row['Vector'])

In [295]:
# Apply the function and concatenate the result with the original DataFrame
expanded_master_df = pd.concat([master_df, master_df.apply(split_list, axis=1)], axis=1)
expanded_master_df

Unnamed: 0,Type,Payload,Vector,0,1,2,3,4,5,6,...,246,247,248,249,250,251,252,253,254,255
0,Normal,id=3&nombre=Vino+Rioja&precio=100&cantidad=55&...,"[-2.3659832, -0.15886183, -0.10971304, 2.20664...",-2.365983,-0.158862,-0.109713,2.206648,-0.421443,-0.542849,0.770784,...,0.776644,0.169099,0.025687,0.115904,2.268859,0.023984,-0.472291,0.015941,1.021271,0.017026
1,Normal,modo=entrar&login=choong&pwd=d1se3ci%F3n&remem...,"[-2.3750374, -0.1567462, -0.116095155, 2.18917...",-2.375037,-0.156746,-0.116095,2.189175,-0.423396,-0.004092,0.753390,...,0.761127,0.146021,0.681185,0.000034,2.261351,0.674759,-0.464666,0.384524,1.023708,0.011762
2,Normal,modo=insertar&precio=2672&B1=Pasar+por+caja,"[-2.3900309, -0.14269774, -2.6847346, 0.132596...",-2.390031,-0.142698,-2.684735,0.132596,-0.006019,0.005649,0.781912,...,0.791758,0.032377,0.040865,0.128093,2.325550,0.039641,-0.014829,0.032775,0.117252,-0.143242
3,Normal,modo=registro&login=cen&password=40a5E&nombre=...,"[-2.3330877, 0.02016984, -2.6327085, 2.185007,...",-2.333088,0.020170,-2.632709,2.185007,-0.373667,-0.503376,0.722580,...,0.036034,0.141645,0.028458,0.084605,2.246880,0.682007,-0.428648,0.366694,0.100196,-0.137929
4,Normal,modo=registro&login=de_la&password=roder%F3n&n...,"[-0.069287404, 0.031637285, -2.762777, 2.33928...",-0.069287,0.031637,-2.762777,2.339288,-0.379208,-0.483655,0.773675,...,0.798775,0.159801,0.699716,0.098885,0.154737,0.048152,-0.435125,0.028756,1.091002,0.040708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12066,Anomalous,modo=registro&login=hogan&password=cha377&nomb...,"[-2.3067095, 0.035235427, -0.0767258, 2.189000...",-2.306710,0.035235,-0.076726,2.189000,-0.373980,0.016207,0.032985,...,0.071023,0.178358,0.694058,0.122825,0.155877,0.708707,-0.421957,0.401749,0.128220,0.050005
12067,Anomalous,modo=registro&login=beaumont&password=quEratIt...,"[-2.3803675, -0.18270046, -2.6728156, 2.123513...",-2.380368,-0.182700,-2.672816,2.123514,-0.445355,-0.560549,0.708134,...,0.709986,0.123321,0.638201,0.070112,2.192156,-0.017564,-0.086028,0.347006,0.966882,-0.195514
12068,Anomalous,modo=registro&login=beaumont&password=quEratIt...,"[-2.3629584, -0.16883184, -2.657836, 0.0550561...",-2.362958,-0.168832,-2.657836,0.055056,-0.416655,-0.528649,0.669992,...,0.685859,0.091949,0.596267,0.025004,2.173910,0.638776,-0.472775,-0.023213,0.052311,-0.172716
12069,Anomalous,modo=registro&login=%2Bmel%2Fhem&password=quEr...,"[-0.10414417, 0.0034454542, -2.6000686, 2.1151...",-0.104144,0.003445,-2.600069,2.115175,-0.442487,-0.566286,-0.000815,...,0.754437,0.184584,0.708390,-0.000677,2.188406,0.646525,-0.051530,0.411364,0.973245,-0.203432


In [296]:
# Drop the original 'Vector' column if desired
expanded_master_df.drop(columns=['Vector'], inplace=True)

In [297]:
expanded_master_df

Unnamed: 0,Type,Payload,0,1,2,3,4,5,6,7,...,246,247,248,249,250,251,252,253,254,255
0,Normal,id=3&nombre=Vino+Rioja&precio=100&cantidad=55&...,-2.365983,-0.158862,-0.109713,2.206648,-0.421443,-0.542849,0.770784,1.482622,...,0.776644,0.169099,0.025687,0.115904,2.268859,0.023984,-0.472291,0.015941,1.021271,0.017026
1,Normal,modo=entrar&login=choong&pwd=d1se3ci%F3n&remem...,-2.375037,-0.156746,-0.116095,2.189175,-0.423396,-0.004092,0.753390,0.058294,...,0.761127,0.146021,0.681185,0.000034,2.261351,0.674759,-0.464666,0.384524,1.023708,0.011762
2,Normal,modo=insertar&precio=2672&B1=Pasar+por+caja,-2.390031,-0.142698,-2.684735,0.132596,-0.006019,0.005649,0.781912,1.512309,...,0.791758,0.032377,0.040865,0.128093,2.325550,0.039641,-0.014829,0.032775,0.117252,-0.143242
3,Normal,modo=registro&login=cen&password=40a5E&nombre=...,-2.333088,0.020170,-2.632709,2.185007,-0.373667,-0.503376,0.722580,1.457505,...,0.036034,0.141645,0.028458,0.084605,2.246880,0.682007,-0.428648,0.366694,0.100196,-0.137929
4,Normal,modo=registro&login=de_la&password=roder%F3n&n...,-0.069287,0.031637,-2.762777,2.339288,-0.379208,-0.483655,0.773675,1.570764,...,0.798775,0.159801,0.699716,0.098885,0.154737,0.048152,-0.435125,0.028756,1.091002,0.040708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12066,Anomalous,modo=registro&login=hogan&password=cha377&nomb...,-2.306710,0.035235,-0.076726,2.189000,-0.373980,0.016207,0.032985,1.483679,...,0.071023,0.178358,0.694058,0.122825,0.155877,0.708707,-0.421957,0.401749,0.128220,0.050005
12067,Anomalous,modo=registro&login=beaumont&password=quEratIt...,-2.380368,-0.182700,-2.672816,2.123514,-0.445355,-0.560549,0.708134,1.410471,...,0.709986,0.123321,0.638201,0.070112,2.192156,-0.017564,-0.086028,0.347006,0.966882,-0.195514
12068,Anomalous,modo=registro&login=beaumont&password=quEratIt...,-2.362958,-0.168832,-2.657836,0.055056,-0.416655,-0.528649,0.669992,1.403081,...,0.685859,0.091949,0.596267,0.025004,2.173910,0.638776,-0.472775,-0.023213,0.052311,-0.172716
12069,Anomalous,modo=registro&login=%2Bmel%2Fhem&password=quEr...,-0.104144,0.003445,-2.600069,2.115175,-0.442487,-0.566286,-0.000815,1.434150,...,0.754437,0.184584,0.708390,-0.000677,2.188406,0.646525,-0.051530,0.411364,0.973245,-0.203432


In [298]:
# Rename the new columns dynamically
num_columns = len(expanded_master_df.columns) - 2  # first 2-cols
new_column_names = {i: f'Vector_{i+1}' for i in range(num_columns)}
expanded_master_df = expanded_master_df.rename(columns=new_column_names)

In [299]:
expanded_master_df

Unnamed: 0,Type,Payload,Vector_1,Vector_2,Vector_3,Vector_4,Vector_5,Vector_6,Vector_7,Vector_8,...,Vector_247,Vector_248,Vector_249,Vector_250,Vector_251,Vector_252,Vector_253,Vector_254,Vector_255,Vector_256
0,Normal,id=3&nombre=Vino+Rioja&precio=100&cantidad=55&...,-2.365983,-0.158862,-0.109713,2.206648,-0.421443,-0.542849,0.770784,1.482622,...,0.776644,0.169099,0.025687,0.115904,2.268859,0.023984,-0.472291,0.015941,1.021271,0.017026
1,Normal,modo=entrar&login=choong&pwd=d1se3ci%F3n&remem...,-2.375037,-0.156746,-0.116095,2.189175,-0.423396,-0.004092,0.753390,0.058294,...,0.761127,0.146021,0.681185,0.000034,2.261351,0.674759,-0.464666,0.384524,1.023708,0.011762
2,Normal,modo=insertar&precio=2672&B1=Pasar+por+caja,-2.390031,-0.142698,-2.684735,0.132596,-0.006019,0.005649,0.781912,1.512309,...,0.791758,0.032377,0.040865,0.128093,2.325550,0.039641,-0.014829,0.032775,0.117252,-0.143242
3,Normal,modo=registro&login=cen&password=40a5E&nombre=...,-2.333088,0.020170,-2.632709,2.185007,-0.373667,-0.503376,0.722580,1.457505,...,0.036034,0.141645,0.028458,0.084605,2.246880,0.682007,-0.428648,0.366694,0.100196,-0.137929
4,Normal,modo=registro&login=de_la&password=roder%F3n&n...,-0.069287,0.031637,-2.762777,2.339288,-0.379208,-0.483655,0.773675,1.570764,...,0.798775,0.159801,0.699716,0.098885,0.154737,0.048152,-0.435125,0.028756,1.091002,0.040708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12066,Anomalous,modo=registro&login=hogan&password=cha377&nomb...,-2.306710,0.035235,-0.076726,2.189000,-0.373980,0.016207,0.032985,1.483679,...,0.071023,0.178358,0.694058,0.122825,0.155877,0.708707,-0.421957,0.401749,0.128220,0.050005
12067,Anomalous,modo=registro&login=beaumont&password=quEratIt...,-2.380368,-0.182700,-2.672816,2.123514,-0.445355,-0.560549,0.708134,1.410471,...,0.709986,0.123321,0.638201,0.070112,2.192156,-0.017564,-0.086028,0.347006,0.966882,-0.195514
12068,Anomalous,modo=registro&login=beaumont&password=quEratIt...,-2.362958,-0.168832,-2.657836,0.055056,-0.416655,-0.528649,0.669992,1.403081,...,0.685859,0.091949,0.596267,0.025004,2.173910,0.638776,-0.472775,-0.023213,0.052311,-0.172716
12069,Anomalous,modo=registro&login=%2Bmel%2Fhem&password=quEr...,-0.104144,0.003445,-2.600069,2.115175,-0.442487,-0.566286,-0.000815,1.434150,...,0.754437,0.184584,0.708390,-0.000677,2.188406,0.646525,-0.051530,0.411364,0.973245,-0.203432


In [300]:
# !pip install scikit-learn

In [301]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [302]:
# Perform label encoding on the categorical label
label_encoder = LabelEncoder()
expanded_master_df['Type'] = label_encoder.fit_transform(expanded_master_df['Type'])

In [316]:
# Define features and target
X = expanded_master_df[['Vector_1', 'Vector_2', 'Vector_3', 'Vector_4', 'Vector_5', 'Vector_6', 'Vector_7', 'Vector_8', 'Vector_9', 'Vector_10', 'Vector_11', 'Vector_12', 'Vector_13']]
y = expanded_master_df['Type']

In [317]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [305]:
X_train.values

array([[-2.4498003 , -0.17574479, -2.7454634 , ...,  0.19913237,
         0.15231791,  2.7542021 ],
       [-2.302913  , -0.1247528 , -2.5869584 , ...,  0.27486667,
         0.2000956 ,  0.1580506 ],
       [-2.3534093 , -0.19604552, -2.6408    , ...,  0.19437325,
        -0.01374539,  2.6184216 ],
       ...,
       [-0.12319289, -0.18323542, -0.12114131, ...,  0.23958555,
         0.13140878,  0.10547628],
       [-2.4158928 , -0.16790693, -0.11940751, ...,  0.2503307 ,
         0.15714413,  0.12808241],
       [-2.43924   ,  0.00570276, -2.7254655 , ...,  0.25425568,
         0.17632647,  2.817519  ]], dtype=float32)

In [306]:
# Create a Random Forest Classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

In [307]:
# Train the classifier
clf.fit(X_train, y_train)

In [308]:
# Make predictions on the test set
y_pred = clf.predict(X_test)

In [309]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.5453416149068323

In [310]:
# Calculate F1 score
f1 = f1_score(y_test, y_pred, average='weighted')
print(f'F1 Score: {f1}')

F1 Score: 0.484221212176086


In [311]:
# payloads

In [312]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [313]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [314]:
# Load Iris dataset
# iris = load_iris()
# X, y = iris.data, iris.target

In [315]:
# Standardize the features
# scaler = StandardScaler()
# X = scaler.fit_transform(X)

In [318]:
# Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [334]:
X_train

tensor([[-2.4498, -0.1757, -2.7455,  ...,  0.1991,  0.1523,  2.7542],
        [-2.3029, -0.1248, -2.5870,  ...,  0.2749,  0.2001,  0.1581],
        [-2.3534, -0.1960, -2.6408,  ...,  0.1944, -0.0137,  2.6184],
        ...,
        [-0.1232, -0.1832, -0.1211,  ...,  0.2396,  0.1314,  0.1055],
        [-2.4159, -0.1679, -0.1194,  ...,  0.2503,  0.1571,  0.1281],
        [-2.4392,  0.0057, -2.7255,  ...,  0.2543,  0.1763,  2.8175]],
       device='cuda:0')

In [225]:
# Convert data to PyTorch tensors and move to GPU
# X_train = torch.FloatTensor(X_train).to(device)
# y_train = torch.LongTensor(y_train).to(device)
# X_test = torch.FloatTensor(X_test).to(device)
# y_test = torch.LongTensor(y_test).to(device)

In [320]:
# Convert data to PyTorch tensors and move to GPU
X_train = torch.FloatTensor(X_train.values).to(device)
y_train = torch.LongTensor(y_train.values).to(device)
X_test = torch.FloatTensor(X_test.values).to(device)
y_test = torch.LongTensor(y_test.values).to(device)

In [321]:
# Define the MLP model
# class MLP(nn.Module):
#     def __init__(self, input_size, hidden_size, output_size):
#         super(MLP, self).__init__()
#         self.fc1 = nn.Linear(input_size, hidden_size)
#         self.relu = nn.ReLU()
#         self.fc2 = nn.Linear(hidden_size, output_size)
    
#     def forward(self, x):
#         x = self.fc1(x)
#         x = self.relu(x)
#         x = self.fc2(x)
#         return x

In [322]:
# Model
# Define a dynamic MLP model and move to GPU
class DynamicMLP(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(DynamicMLP, self).__init__()
        layers = []
        for i in range(len(hidden_sizes)):
            if i == 0:
                layers.append(nn.Linear(input_size, hidden_sizes[i]))
            else:
                layers.append(nn.Linear(hidden_sizes[i-1], hidden_sizes[i]))
            layers.append(nn.ReLU())
        layers.append(nn.Linear(hidden_sizes[-1], output_size))
        self.mlp = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.mlp(x)

In [323]:
# Define the sizes of hidden layers
hidden_sizes = [500, 400, 300, 50, 32]

In [329]:
# Instantiate the dynamic model and move to GPU
input_size = X_train.shape[1]
output_size = len(set(y))
model = DynamicMLP(input_size, hidden_sizes, output_size).to(device)

In [330]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [331]:
# Training the model
num_epochs = 5000

In [332]:
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print progress
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [100/5000], Loss: 0.6706
Epoch [200/5000], Loss: 0.6641
Epoch [300/5000], Loss: 0.6463
Epoch [400/5000], Loss: 0.6331
Epoch [500/5000], Loss: 0.6127
Epoch [600/5000], Loss: 0.5912
Epoch [700/5000], Loss: 0.5804
Epoch [800/5000], Loss: 0.5675
Epoch [900/5000], Loss: 0.5320
Epoch [1000/5000], Loss: 0.6106
Epoch [1100/5000], Loss: 0.4808
Epoch [1200/5000], Loss: 0.4495
Epoch [1300/5000], Loss: 0.4275
Epoch [1400/5000], Loss: 0.4173
Epoch [1500/5000], Loss: 0.3951
Epoch [1600/5000], Loss: 0.3845
Epoch [1700/5000], Loss: 0.3608
Epoch [1800/5000], Loss: 0.4860
Epoch [1900/5000], Loss: 0.3396
Epoch [2000/5000], Loss: 0.3214
Epoch [2100/5000], Loss: 0.3017
Epoch [2200/5000], Loss: 0.3666
Epoch [2300/5000], Loss: 0.3129
Epoch [2400/5000], Loss: 0.3026
Epoch [2500/5000], Loss: 0.2825
Epoch [2600/5000], Loss: 0.2703
Epoch [2700/5000], Loss: 0.2597
Epoch [2800/5000], Loss: 0.2945
Epoch [2900/5000], Loss: 0.2592
Epoch [3000/5000], Loss: 0.2605
Epoch [3100/5000], Loss: 0.2476
Epoch [3200/5000]

In [333]:
# Evaluate the model on the test set
with torch.no_grad():
    model.eval()
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)
    accuracy = (predicted == y_test).sum().item() / y_test.size(0)
    print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.5383
