<a href="https://colab.research.google.com/github/banno-0720/Deep-Learning-Projects/blob/main/IntrusionDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
!pip install datasets
!pip install opendatasets

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

In [4]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import requests
from pathlib import Path
import opendatasets as od

# 1. Load Dataset

In [5]:
od.download("https://www.kaggle.com/datasets/chethuhn/network-intrusion-dataset")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: himanshugoyal2004
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/chethuhn/network-intrusion-dataset
Downloading network-intrusion-dataset.zip to ./network-intrusion-dataset


100%|██████████| 230M/230M [00:10<00:00, 23.1MB/s]





In [6]:
def load_data():
    # Download dataset
    od.download("https://www.kaggle.com/datasets/chethuhn/network-intrusion-dataset")
    dataset_file = "/content/network-intrusion-dataset/Friday-WorkingHours-Afternoon-DDos.pcap_ISCX.csv"

    # Load dataset into pandas DataFrame
    df = pd.read_csv(dataset_file)

    # Clean up column names (remove spaces and make lower case)
    df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')

    # Inspect cleaned column names
    print("Cleaned Columns in the dataset:")
    print(df.columns)

    # Filter relevant columns
    features = ['flow_duration', 'total_fwd_packets', 'total_backward_packets',
                'fwd_packet_length_mean', 'bwd_packet_length_mean',
                'flow_packets/s', 'label']
    df = df[features]
    df.dropna(inplace=True)  # Handle missing values
    return df

# # Test the function
# data = load_data()
# print(data.head())


# 2. Preprocess Data

In [7]:
def preprocess_data(data):
    # Convert labels to binary (e.g., 'DDoS' -> 1, others -> 0)
    data['label'] = data['label'].apply(lambda x: 1 if x == 'DDoS' else 0)

    # Replace infinite values with NaN and drop them
    data.replace([np.inf, -np.inf], np.nan, inplace=True)

    # Drop rows with NaN values
    data.dropna(inplace=True)

    # Normalize numeric features
    scaler = MinMaxScaler()
    X = scaler.fit_transform(data.drop(columns=['label']))
    y = data['label'].values

    # Group data into sequences (example: group by batches of 10 rows)
    sequence_length = 10
    X_sequences, y_sequences = [], []
    for i in range(0, len(X) - sequence_length):
        X_sequences.append(X[i:i+sequence_length])
        y_sequences.append(y[i+sequence_length - 1])  # Label for the sequence

    return np.array(X_sequences), np.array(y_sequences)


# 3. PyTorch Dataset Class

In [8]:
class DDoSDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# 4. LSTM Model

In [9]:
class DDoSLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout=0.2):
        super(DDoSLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out, (hn, cn) = self.lstm(x)
        out = self.fc(hn[-1])  # Use the last hidden state for classification
        return self.sigmoid(out)

# 5. Training Function

In [10]:
def train_model(model, train_loader, val_loader, epochs, learning_rate):
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            outputs = model(X_batch).squeeze()
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0
        val_preds, val_targets = [], []
        with torch.no_grad():
            for X_batch, y_batch in val_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch).squeeze()
                loss = criterion(outputs, y_batch)
                val_loss += loss.item()
                val_preds.extend((outputs > 0.5).cpu().numpy())
                val_targets.extend(y_batch.cpu().numpy())

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}, Val Accuracy: {accuracy_score(val_targets, val_preds):.4f}")

# 6. Main Execution

In [11]:
data = load_data()
X, y = preprocess_data(data)

# Split into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Create DataLoaders
train_dataset = DDoSDataset(X_train, y_train)
val_dataset = DDoSDataset(X_val, y_val)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Define model
input_size = X.shape[2]  # Number of features
hidden_size = 64
num_layers = 2
output_size = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = DDoSLSTM(input_size, hidden_size, num_layers, output_size).to(device)

# Train model
train_model(model, train_loader, val_loader, epochs=10, learning_rate=0.001)

# Evaluate final performance
print("Training Complete!")


Skipping, found downloaded files in "./network-intrusion-dataset" (use force=True to force download)
Cleaned Columns in the dataset:
Index(['destination_port', 'flow_duration', 'total_fwd_packets',
       'total_backward_packets', 'total_length_of_fwd_packets',
       'total_length_of_bwd_packets', 'fwd_packet_length_max',
       'fwd_packet_length_min', 'fwd_packet_length_mean',
       'fwd_packet_length_std', 'bwd_packet_length_max',
       'bwd_packet_length_min', 'bwd_packet_length_mean',
       'bwd_packet_length_std', 'flow_bytes/s', 'flow_packets/s',
       'flow_iat_mean', 'flow_iat_std', 'flow_iat_max', 'flow_iat_min',
       'fwd_iat_total', 'fwd_iat_mean', 'fwd_iat_std', 'fwd_iat_max',
       'fwd_iat_min', 'bwd_iat_total', 'bwd_iat_mean', 'bwd_iat_std',
       'bwd_iat_max', 'bwd_iat_min', 'fwd_psh_flags', 'bwd_psh_flags',
       'fwd_urg_flags', 'bwd_urg_flags', 'fwd_header_length',
       'bwd_header_length', 'fwd_packets/s', 'bwd_packets/s',
       'min_packet_length', '