In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import joblib

In [2]:
import pandas as pd
df = pd.read_csv("/Users/gokul/Desktop/Attack_Identification/Threat_Identification/module3.csv",encoding='latin-1')
df_pay = df["injection_type"]
df.drop(["payload"],axis=1,inplace=True)
df.drop(["injection_type"],axis=1,inplace=True)
X = df.values
y = df_pay.values

In [3]:
X

array([[ 9,  0,  0, ...,  0,  0,  0],
       [12,  0,  0, ...,  0,  0,  0],
       [38,  0,  2, ...,  0,  1,  0],
       ...,
       [ 7,  0,  0, ...,  0,  0,  0],
       [ 8,  0,  0, ...,  0,  0,  0],
       [ 8,  0,  0, ...,  0,  0,  0]], dtype=int64)

In [4]:
from sklearn.model_selection import train_test_split
X_train , X_test , y_train , y_test = train_test_split(X ,y , test_size=0.2,random_state= 2) 
X_train = torch.Tensor(X_train)
X_test = torch.Tensor(X_test)
y_train = torch.Tensor(y_train)
y_test = torch.Tensor(y_test)
y_train = y_train.type(torch.LongTensor)
y_test = y_test.type(torch.LongTensor)

In [5]:
class BayesianNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(BayesianNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [6]:
def train(model, optimizer, criterion, train_loader):
    model.train()
    train_loss = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    return train_loss / len(train_loader)

In [8]:
def evaluate(model, criterion, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            test_loss += criterion(outputs, labels).item()
            pred = outputs.argmax(dim=1, keepdim=True)
            correct += pred.eq(labels.view_as(pred)).sum().item()
    return test_loss / len(test_loader), correct / len(test_loader.dataset)

In [9]:
input_size = 9
hidden_size = 1024
output_size = 4
num_epochs = 15
batch_size = 512
learning_rate = 0.01

In [10]:
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [11]:
model = BayesianNN(input_size, hidden_size, output_size)

In [12]:
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [13]:
for epoch in range(num_epochs):
    train_loss = train(model, optimizer, criterion, train_loader)
    test_loss, test_accuracy = evaluate(model, criterion, test_loader)
    print(f"Epoch {epoch+1} - Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

Epoch 1 - Train Loss: 0.9019, Test Loss: 0.1211, Test Accuracy: 0.9739
Epoch 2 - Train Loss: 0.1772, Test Loss: 0.0957, Test Accuracy: 0.9740
Epoch 3 - Train Loss: 0.1039, Test Loss: 0.0819, Test Accuracy: 0.9763
Epoch 4 - Train Loss: 0.1121, Test Loss: 0.0886, Test Accuracy: 0.9752
Epoch 5 - Train Loss: 0.1269, Test Loss: 0.0692, Test Accuracy: 0.9786
Epoch 6 - Train Loss: 0.0971, Test Loss: 0.1309, Test Accuracy: 0.9715
Epoch 7 - Train Loss: 0.1114, Test Loss: 0.0846, Test Accuracy: 0.9764
Epoch 8 - Train Loss: 0.0941, Test Loss: 0.0753, Test Accuracy: 0.9768
Epoch 9 - Train Loss: 0.0743, Test Loss: 0.0714, Test Accuracy: 0.9782
Epoch 10 - Train Loss: 0.0707, Test Loss: 0.0718, Test Accuracy: 0.9783
Epoch 11 - Train Loss: 0.0715, Test Loss: 0.0826, Test Accuracy: 0.9769
Epoch 12 - Train Loss: 0.0822, Test Loss: 0.0986, Test Accuracy: 0.9744
Epoch 13 - Train Loss: 0.0808, Test Loss: 0.0723, Test Accuracy: 0.9786
Epoch 14 - Train Loss: 0.0641, Test Loss: 0.0689, Test Accuracy: 0.9798
E

In [14]:
file_path = "C:/Users/gokul/Desktop/Attack_Identification/Threat_Identification"

def dump(model , filename):
    with open(filename, "wb") as f:
        joblib.dump(model , f)

In [15]:
dump(model , filename=f"{file_path}/bnn.pkl")

In [16]:
import sys 
sys.path.append('/Users/gokul/Desktop/Attack_Identification')
sys.path

from WAF_Model_Trainer.utils import extract_feature

In [17]:
from WAF_Model_Trainer.utils import extract_feature
payload_n = "&& perl -e 'print ""X""x4096'"
d = extract_feature(payload_n)
d.drop(columns=d.columns[0], axis=1,  inplace=True)
im_arr = np.array(d.values)
im_arr = im_arr.flatten()
im_arr

array([25,  0,  5,  5,  0,  0,  0,  0,  4], dtype=int64)

In [18]:
def predict(model, data):
    model.eval()
    with torch.no_grad():
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
    return predicted

In [19]:
file_path ="C:/Users/gokul/Desktop/Attack_Identification/Threat_Identification/bnn.pkl"

In [20]:
with open(file_path , "rb") as g:
    bnn_model = joblib.load(g)
        

In [21]:
bnn_model

BayesianNN(
  (fc1): Linear(in_features=9, out_features=1024, bias=True)
  (fc2): Linear(in_features=1024, out_features=4, bias=True)
)

In [22]:
new_data = torch.Tensor(im_arr)
#bnn_model = joblib.load("c:/Users/gokul/Desktop/Attack_Identification/Threat_Identification/bnn.pkl")

predicted_class = predict(bnn_model, new_data.unsqueeze(0))
if(predicted_class.item() == 0) :
    attack_type = "NORMAL"
elif(predicted_class.item() == 1) :
    attack_type = "COMMAND INJECTION"
elif(predicted_class.item() == 2) :
    attack_type = "SQL INJECTION"
else:
    attack_type = "CROSS-SITE SCRIPTING (XSS)"

print(attack_type) 

COMMAND INJECTION


In [22]:
df_attacks = pd.read_csv("/Users/gokul/Desktop/Attack_Identification/Logged_Output/waf_payloads.csv")
fin_arr = []

In [23]:
for index, row in df_attacks.iterrows():
    current_row = []
    current_payload = row['Malicious Payload']
    current_row.append(current_payload)
    d = extract_feature(current_payload)
    d.drop(columns=d.columns[0], axis=1,  inplace=True)
    im_arr = np.array(d.values)
    im_arr = im_arr.flatten()
    current_data = torch.Tensor(im_arr)
    predicted_class = predict(bnn_model, current_data.unsqueeze(0))
    if(predicted_class.item() == 0) :
        attack_type = "NORMAL"
    elif(predicted_class.item() == 1) :
        attack_type = "COMMAND INJECTION"
    elif(predicted_class.item() == 2) :
        attack_type = "SQL INJECTION"
    else:
        attack_type = "CROSS-SITE SCRIPTING (XSS)"
    current_row.append(attack_type)
    fin_arr.append(current_row)
    
fin_arr = np.array(fin_arr)


In [24]:
df = pd.DataFrame({'Payload': fin_arr[:, 0], 'Attack Type': fin_arr[:, 1]})
df.info
df.to_csv("/Users/gokul/Desktop/Attack_Identification/Logged_Output/bnns_attack_types.csv")