In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import numpy as np
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
df = pd.read_csv('iot23_combined_2.csv')

In [3]:
X = df[['duration', 'orig_bytes', 'resp_bytes', 'missed_bytes', 'orig_pkts', 'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes', 'proto_icmp', 'proto_tcp', 'proto_udp', 'conn_state_OTH', 'conn_state_REJ', 'conn_state_RSTO', 'conn_state_RSTOS0', 'conn_state_RSTR', 'conn_state_RSTRH', 'conn_state_S0', 'conn_state_S1', 'conn_state_S2', 'conn_state_S3', 'conn_state_SF', 'conn_state_SH', 'conn_state_SHR']]
Y = pd.get_dummies(df['label']).values

In [4]:
scaler = MinMaxScaler()
scaler.fit(X)
normalized_x = scaler.transform(X)
scaler.fit(Y)
normalized_y = scaler.transform(Y)
X_train, X_test, y_train, y_test = train_test_split(normalized_x, normalized_y, random_state=10, test_size=0.2)

In [5]:
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).float()
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).float()

In [6]:
# Define the base models
class Model1(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(Model1, self).__init__()
        self.fc1 = nn.Linear(input_shape, 64)
        self.fc2 = nn.Linear(64, num_classes)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

class Model2(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(Model2, self).__init__()
        self.fc1 = nn.Linear(input_shape, 128)
        self.fc2 = nn.Linear(128, num_classes)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

class Model3(nn.Module):
    def __init__(self, input_shape, num_classes):
        super(Model3, self).__init__()
        self.fc1 = nn.Linear(input_shape, 32)
        self.fc2 = nn.Linear(32, num_classes)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x

# Define the ensemble model
class EnsembleModel(nn.Module):
    def __init__(self, input_shape, num_classes, models):
        super(EnsembleModel, self).__init__()
        self.models = nn.ModuleList(models)
        self.fc1 = nn.Linear(len(models) * num_classes, num_classes)
        
    def forward(self, x):
        outputs = []
        for model in self.models:
            outputs.append(model(x))
        x = torch.cat(outputs, dim=1)
        x = self.fc1(x)
        x = torch.softmax(x, dim=1)
        return x

# Initialize the base models
models = [Model1(24, 4), Model2(24, 4), Model3(24, 4)]

# Initialize the ensemble model
ensemble_model = EnsembleModel(24, 4, models)

# Initialize the optimizer
optimizer = optim.Adam(ensemble_model.parameters(), lr=0.001)

# Initialize the dataset and dataloader
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Train the models
num_epochs = 100
for epoch in range(num_epochs):
    for x_batch, y_batch in train_loader:
        optimizer.zero_grad()
        y_pred = ensemble_model(x_batch)
        # print(y_pred)
        loss = nn.BCEWithLogitsLoss()(y_pred, y_batch)
        loss.backward()
        optimizer.step()

# Make predictions on the test set
y_pred = ensemble_model(X_test)
y_pred = torch.argmax(y_pred, dim=1)

# Compute accuracy
_, normal_y_test = torch.max(y_test.data,1)
accuracy = (y_pred == normal_y_test).float().mean()
print("Accuracy on test set:", accuracy.item())


Accuracy on test set: 0.5119125247001648


In [7]:
torch.save(ensemble_model,'stacking.pt')