In [None]:
!pip install -q imbalanced-learn
print('Done!')

Done!


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import math
import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('All libraries imported!')
print(f'Using device: {device}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
else:
    print('No GPU found - using CPU')

All libraries imported!
Using device: cuda
GPU: Tesla T4


### Step 2: Uploading Dataset
upload: `Tuesday-WorkingHours.pcap_ISCX.csv`

In [None]:
from google.colab import files
print('Click Choose Files to upload your CSV...')
uploaded = files.upload()
filename = list(uploaded.keys())[0]
print(f'Uploaded: {filename}')

Click Choose Files to upload your CSV...


IndexError: list index out of range

multiple CSV files

In [None]:
import pandas as pd
from google.colab import files

print('Upload ALL your CSV files one by one...')
print('(Mirai-greeth, Mirai-greip, Mirai-udpplain, Benign_Final)')

# This lets you select MULTIPLE files at once
uploaded = files.upload()

# Load and combine all uploaded files
dfs = []
for filename, content in uploaded.items():
    import io
    df_temp = pd.read_csv(io.BytesIO(content))
    print(f'Loaded: {filename} → {df_temp.shape}')
    dfs.append(df_temp)

# Combine everything into one big dataframe
df = pd.concat(dfs, ignore_index=True)

# Identify the label column dynamically, making it case-insensitive
label_col = None
for col in df.columns:
    if 'label' in col.lower():
        label_col = col
        break

print(f'\n✅ All files combined!')
print(f'Total rows: {len(df):,}')
print(f'Total columns: {len(df.columns)}')
print(f'\nLabel counts:')
if label_col:
    print(df[label_col].value_counts())
else:
    print("Warning: No 'label' column found (case-insensitive).")
    print("Available columns:", df.columns.tolist())

Upload ALL your CSV files one by one...
(Mirai-greeth, Mirai-greip, Mirai-udpplain, Benign_Final)


Saving Mirai-greip_flood.pcap.csv to Mirai-greip_flood.pcap (1).csv
Saving Mirai-greip_flood1.pcap.csv to Mirai-greip_flood1.pcap (1).csv
Saving Mirai-greip_flood2.pcap.csv to Mirai-greip_flood2.pcap (1).csv
Saving Mirai-greip_flood3.pcap.csv to Mirai-greip_flood3.pcap (1).csv
Saving Mirai-greip_flood4.pcap.csv to Mirai-greip_flood4.pcap (1).csv
Saving Mirai-greip_flood5.pcap.csv to Mirai-greip_flood5.pcap (1).csv
Saving Mirai-greip_flood6.pcap.csv to Mirai-greip_flood6.pcap (1).csv
Saving Mirai-greip_flood7.pcap.csv to Mirai-greip_flood7.pcap (1).csv
Saving Mirai-greip_flood8.pcap.csv to Mirai-greip_flood8.pcap (1).csv
Saving Mirai-greip_flood9.pcap.csv to Mirai-greip_flood9.pcap (1).csv
Saving Mirai-greip_flood10.pcap.csv to Mirai-greip_flood10.pcap (1).csv
Saving Mirai-greip_flood11.pcap.csv to Mirai-greip_flood11.pcap (1).csv
Saving Mirai-greip_flood12.pcap (1).csv to Mirai-greip_flood12.pcap (1) (1).csv
Saving Mirai-greip_flood12.pcap.csv to Mirai-greip_flood12.pcap (2).csv
Saving

In [None]:
from google.colab import files
files.download('best_model.pth')  # Save your trained model!

Testing


In [None]:
import pandas as pd
import io

# Check what files were uploaded
print("Files uploaded:")
for filename in uploaded.keys():
    print(f"  → {filename}")

In [None]:
import os

print("ALL FILES IN COLAB RIGHT NOW:")
for f in os.listdir('/content'):
    size = os.path.getsize(f'/content/{f}') / (1024*1024)
    print(f"  → {f}  ({size:.1f} MB)")

collab


In [None]:
import pandas as pd
import glob

print("Loading all files...")

# Load all Mirai (botnet) files
mirai_files = glob.glob('/content/Mirai-*.csv')
benign_files = glob.glob('/content/BenignTraffic*.csv')

print(f"Found {len(mirai_files)} Mirai files")
print(f"Found {len(benign_files)} Benign files")

# Load Mirai files and label them as 1 (attack)
mirai_dfs = []
for f in mirai_files:
    df_temp = pd.read_csv(f)
    df_temp['label'] = 1  # 1 = Botnet attack
    mirai_dfs.append(df_temp)

# Load Benign files and label them as 0 (normal)
benign_dfs = []
for f in benign_files:
    df_temp = pd.read_csv(f)
    df_temp['label'] = 0  # 0 = Normal traffic
    benign_dfs.append(df_temp)

# Combine everything
df = pd.concat(mirai_dfs + benign_dfs, ignore_index=True)

print(f"\n✅ DATASET READY!")
print(f"Total rows    : {len(df):,}")
print(f"Total columns : {len(df.columns)}")
print(f"\nClass distribution:")
print(f"  Botnet (1) : {(df['label']==1).sum():,}")
print(f"  Benign (0) : {(df['label']==0).sum():,}")

### Step 3: Load and Preprocess Data

In [None]:
import pandas as pd
import numpy as np
import glob
import torch
import torch.nn as nn
import torch.optim as optim
import math
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix, classification_report
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# ── STEP 1: Load ALL files ──────────────────
print("\nLoading all files...")

mirai_files  = glob.glob('/content/Mirai-*.csv')
benign_files = glob.glob('/content/BenignTraffic*.csv')
print(f'Mirai files  : {len(mirai_files)}')
print(f'Benign files : {len(benign_files)}')

# Load Mirai → label 1 (attack)
mirai_dfs = []
for f in mirai_files:
    df_temp = pd.read_csv(f)
    df_temp['label'] = 1
    mirai_dfs.append(df_temp)

# Load Benign → label 0 (normal)
benign_dfs = []
for f in benign_files:
    df_temp = pd.read_csv(f)
    df_temp['label'] = 0
    benign_dfs.append(df_temp)

# Combine all into one dataframe
df = pd.concat(mirai_dfs + benign_dfs, ignore_index=True)
print(f'\nTotal rows    : {len(df):,}')
print(f'Total columns : {len(df.columns)}')

# ── STEP 2: Clean Data ──────────────────────
print("\nCleaning data...")
df.replace([np.inf, -np.inf], np.nan, inplace=True)
df.fillna(0, inplace=True)

# ── STEP 3: Features and Labels ─────────────
y_binary  = df['label']
X_numeric = df.drop(['label'], axis=1).select_dtypes(include=[np.number])

print(f'\nFeatures : {X_numeric.shape[1]}')
print(f'Benign   : {(y_binary==0).sum():,} ({(y_binary==0).mean()*100:.1f}%)')
print(f'Botnet   : {(y_binary==1).sum():,} ({(y_binary==1).mean()*100:.1f}%)')

# ── STEP 4: Plot Class Distribution ─────────
plt.figure(figsize=(8, 4))
y_binary.value_counts().plot(kind='bar', color=['steelblue','tomato'])
plt.title('Class Distribution')
plt.xlabel('0 = Benign, 1 = Botnet')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

# ── STEP 5: Sample ───────────────────────────
SAMPLE_SIZE     = 100000
SEQUENCE_LENGTH = 10

print(f'\nSampling {SAMPLE_SIZE:,} rows...')
idx       = np.random.choice(len(X_numeric), SAMPLE_SIZE, replace=False)
X_sampled = X_numeric.iloc[idx].reset_index(drop=True)
y_sampled = y_binary.iloc[idx].reset_index(drop=True)

# ── STEP 6: Scale ────────────────────────────
scaler   = StandardScaler()
X_scaled = scaler.fit_transform(X_sampled)

# ── STEP 7: Create Sequences ─────────────────
print(f'Creating sequences of {SEQUENCE_LENGTH} packets...')
X_seq, y_seq = [], []
for i in range(len(X_scaled) - SEQUENCE_LENGTH + 1):
    X_seq.append(X_scaled[i:i+SEQUENCE_LENGTH])
    y_seq.append(y_sampled.values[i+SEQUENCE_LENGTH-1])

X_seq = np.array(X_seq, dtype=np.float32)
y_seq = np.array(y_seq, dtype=np.float32)

# ── STEP 8: Train/Test Split ─────────────────
X_train, X_test, y_train, y_test = train_test_split(
    X_seq, y_seq, test_size=0.2, random_state=42, stratify=y_seq
)

print(f'\nTrain sequences : {len(X_train):,}')
print(f'Test  sequences : {len(X_test):,}')
print(f'Sequence shape  : {X_train.shape[1:]}')
print('\n✅ Data ready! Next step: Build the Transformer model')

### Step 4: Build Transformer Model

In [None]:
# ── IMPROVED MODEL ───────────────────────────
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super().__init__()
        pe       = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len).unsqueeze(1).float()
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe.unsqueeze(0))

    def forward(self, x):
        return x + self.pe[:, :x.size(1)]


class BotnetTransformer(nn.Module):
    def __init__(self, input_dim, d_model=256, nhead=8, num_layers=3, dropout=0.2):
        super().__init__()

        # IMPROVED: Better input projection (was just one Linear layer)
        self.input_projection = nn.Sequential(
            nn.Linear(input_dim, d_model),
            nn.LayerNorm(d_model),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

        self.pos_encoder = PositionalEncoding(d_model)

        # IMPROVED: Bigger model (was d_model=128, nhead=4, layers=2)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead,
            dim_feedforward=d_model*4,
            dropout=dropout, batch_first=True,
            norm_first=True              # More stable training
        )
        self.transformer = nn.TransformerEncoder(
            encoder_layer, num_layers=num_layers,
            norm=nn.LayerNorm(d_model)
        )

        # IMPROVED: Better classifier head
        self.classifier = nn.Sequential(
            nn.Linear(d_model, 128),
            nn.LayerNorm(128),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(dropout/2),
            nn.Linear(64, 1)
            # NO Sigmoid here — BCEWithLogitsLoss handles it
        )

    def forward(self, x):
        x = self.input_projection(x)
        x = self.pos_encoder(x)
        x = self.transformer(x)
        x = x.mean(dim=1)              # IMPROVED: Mean pooling (was last token only)
        return self.classifier(x).squeeze()


input_dim = X_train.shape[2]
model     = BotnetTransformer(input_dim=input_dim).to(device)
total_params = sum(p.numel() for p in model.parameters())

print(f'✅ Model ready!')
print(f'Parameters : {total_params:,}')
print(f'Device     : {device}')
print(f'')
print(f'Improvements over your original:')
print(f'  OLD: d_model=128, heads=4, layers=2, last-token pooling')
print(f'  NEW: d_model=256, heads=8, layers=3, mean pooling')

### Step 5: Train the Model (Takes 5-15 mins)

In [None]:
# ── TRAINING SETUP ───────────────────────────
EPOCHS      = 30
BATCH_SIZE  = 128    # was 64 — bigger = faster on GPU
LEARNING_RATE = 0.001

# Handle class imbalance — OLD CODE MISSING THIS!
neg_count  = (y_train == 0).sum()
pos_count  = (y_train == 1).sum()
pos_weight = torch.tensor([neg_count / pos_count]).to(device)
print(f'Class weight : {pos_weight.item():.2f}x')

# Prepare tensors
X_train_t = torch.FloatTensor(X_train).to(device)
y_train_t = torch.FloatTensor(y_train).to(device)
X_test_t  = torch.FloatTensor(X_test).to(device)

train_loader = DataLoader(
    TensorDataset(X_train_t, y_train_t),
    batch_size=BATCH_SIZE, shuffle=True
)

# OLD: nn.BCELoss()  ← crashes because model has no Sigmoid now
# NEW: BCEWithLogitsLoss ← handles imbalance + more stable
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-5)

# OLD: StepLR — reduces every 5 epochs blindly
# NEW: ReduceLROnPlateau — reduces only when model stops improving
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', patience=3, factor=0.5
)

# ── EARLY STOPPING ── OLD CODE MISSING THIS!
class EarlyStopping:
    def __init__(self, patience=7):
        self.patience    = patience
        self.counter     = 0
        self.best_score  = None
        self.should_stop = False

    def __call__(self, score):
        if self.best_score is None or score > self.best_score + 0.001:
            self.best_score = score
            self.counter    = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.should_stop = True

early_stopping = EarlyStopping(patience=7)

# ── TRAIN ────────────────────────────────────
train_losses = []
test_f1s     = []
best_f1      = 0

print('Training started...')
print('-' * 65)

for epoch in range(EPOCHS):
    # Training
    model.train()
    epoch_loss = 0
    for batch_X, batch_y in train_loader:
        optimizer.zero_grad()
        loss = criterion(model(batch_X), batch_y)
        loss.backward()
        nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)
    train_losses.append(avg_loss)

    # Evaluation
    model.eval()
    with torch.no_grad():
        logits = model(X_test_t).cpu().numpy()
        probs  = torch.sigmoid(torch.tensor(logits)).numpy()
        preds  = (probs > 0.5).astype(int)

    acc = accuracy_score(y_test, preds)
    f1  = f1_score(y_test, preds, zero_division=0)
    test_f1s.append(f1)

    tag = ''
    if f1 > best_f1:
        best_f1 = f1
        torch.save(model.state_dict(), 'best_model.pth')
        tag = ' ← BEST'

    scheduler.step(f1)
    lr = optimizer.param_groups[0]['lr']
    print(f'Epoch {epoch+1:2d}/{EPOCHS} | Loss: {avg_loss:.4f} | Acc: {acc:.4f} | F1: {f1:.4f} | LR: {lr:.6f}{tag}')

    early_stopping(f1)
    if early_stopping.should_stop:
        print(f'\nEarly stopping at epoch {epoch+1}')
        break

print('-' * 65)
print(f'✅ Training complete! Best F1: {best_f1:.4f}')

### Step 6: Results and Visualization

In [None]:
# ── FINAL EVALUATION & CHARTS ────────────────
from sklearn.metrics import roc_curve, precision_recall_curve, average_precision_score, precision_recall_fscore_support

# Load best model
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

with torch.no_grad():
    logits = model(X_test_t).cpu().numpy()
    probs  = torch.sigmoid(torch.tensor(logits)).numpy()
    preds  = (probs > 0.5).astype(int)

# Metrics
accuracy           = accuracy_score(y_test, preds)
precision, recall, f1, _ = precision_recall_fscore_support(y_test, preds, average='binary')
auc                = roc_auc_score(y_test, probs)

print('=' * 50)
print('       YOUR FINAL RESULTS')
print('=' * 50)
print(f'  Accuracy  : {accuracy:.4f} ({accuracy*100:.2f}%)')
print(f'  Precision : {precision:.4f}')
print(f'  Recall    : {recall:.4f}')
print(f'  F1-Score  : {f1:.4f}')
print(f'  ROC-AUC   : {auc:.4f}')
print('=' * 50)
print(classification_report(y_test, preds, target_names=['Benign','Botnet']))

# ── CHART 1: Training Curves ─────────────────
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
fig.suptitle('Botnet Detection - Transformer Results', fontsize=14, fontweight='bold')

axes[0].plot(range(1, len(train_losses)+1), train_losses, 'b-o', linewidth=2)
axes[0].set_title('Training Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].grid(True, alpha=0.3)

axes[1].plot(range(1, len(test_f1s)+1), test_f1s, 'g-o', linewidth=2)
axes[1].set_title('F1 Score Per Epoch')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('F1 Score')
axes[1].set_ylim([0, 1.05])
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_curves.png', dpi=150)
plt.show()

# ── CHART 2: Confusion Matrix ─────────────────
plt.figure(figsize=(7, 5))
cm = confusion_matrix(y_test, preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Benign','Botnet'],
            yticklabels=['Benign','Botnet'])
plt.title('Confusion Matrix', fontsize=13, fontweight='bold')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=150)
plt.show()

# ── CHART 3: ROC Curve ───────────────────────
plt.figure(figsize=(7, 5))
fpr, tpr, _ = roc_curve(y_test, probs)
plt.plot(fpr, tpr, 'b-', linewidth=2, label=f'Transformer (AUC={auc:.4f})')
plt.plot([0,1],[0,1],'k--', alpha=0.5, label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve', fontsize=13, fontweight='bold')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('roc_curve.png', dpi=150)
plt.show()

# ── CHART 4: Metrics Bar Chart ────────────────
plt.figure(figsize=(10, 5))
metrics = {'Accuracy': accuracy, 'Precision': precision,
           'Recall': recall, 'F1-Score': f1, 'ROC-AUC': auc}
colors = ['#2196F3','#4CAF50','#FF9800','#E91E63','#9C27B0']
bars = plt.bar(metrics.keys(), metrics.values(), color=colors, edgecolor='black')
for bar, val in zip(bars, metrics.values()):
    plt.text(bar.get_x() + bar.get_width()/2,
             bar.get_height() + 0.01,
             f'{val:.4f}', ha='center', fontweight='bold', fontsize=11)
plt.ylim(0, 1.15)
plt.title('Model Performance Metrics', fontsize=14, fontweight='bold')
plt.ylabel('Score')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('metrics.png', dpi=150)
plt.show()

# ── DOWNLOAD ALL FILES ────────────────────────
from google.colab import files
files.download('best_model.pth')
files.download('training_curves.png')
files.download('confusion_matrix.png')
files.download('roc_curve.png')
files.download('metrics.png')
print('✅ All files downloaded!')

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
fig.suptitle('Botnet Detection - Transformer Results', fontsize=16, fontweight='bold')

axes[0].plot(range(1, len(train_losses)+1), train_losses, 'b-o', linewidth=2)
axes[0].set_title('Training Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].grid(True, alpha=0.3)

axes[1].plot(range(1, len(test_f1s)+1), test_f1s, 'g-o', linewidth=2)
axes[1].set_title('Test F1 Score')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('F1 Score')
axes[1].set_ylim([0, 1])
axes[1].grid(True, alpha=0.3)

cm = confusion_matrix(y_test, preds)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Benign','Botnet'],
            yticklabels=['Benign','Botnet'], ax=axes[2])
axes[2].set_title('Confusion Matrix')
axes[2].set_ylabel('True Label')
axes[2].set_xlabel('Predicted Label')

plt.tight_layout()
plt.savefig('results.png', dpi=150, bbox_inches='tight')
plt.show()

In [None]:
metrics = {'Accuracy': accuracy, 'Precision': precision,
           'Recall': recall, 'F1-Score': f1, 'ROC-AUC': auc}
colors = ['#2196F3','#4CAF50','#FF9800','#E91E63','#9C27B0']

plt.figure(figsize=(10, 5))
bars = plt.bar(metrics.keys(), metrics.values(), color=colors)
for bar, val in zip(bars, metrics.values()):
    plt.text(bar.get_x() + bar.get_width()/2,
             bar.get_height() + 0.01,
             f'{val:.4f}', ha='center', fontweight='bold', fontsize=12)
plt.ylim(0, 1.15)
plt.title('Model Performance Metrics', fontsize=15, fontweight='bold')
plt.ylabel('Score')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig('metrics.png', dpi=150, bbox_inches='tight')
plt.show()
print('All charts saved!')

### Step 7: Download Your Results

In [None]:
from google.colab import files
files.download('results.png')
files.download('metrics.png')
files.download('best_model.pth')
print('Downloaded: results.png, metrics.png, best_model.pth')

## Congratulations! Project Complete!

**For your report include:**
- Dataset: CICIDS2017 - Tuesday Working Hours
- Model: Transformer Encoder with Multi-Head Self-Attention (4 heads, 2 layers)
- Features: 77 network flow features
- Training: 20 epochs, Adam optimizer, Binary Cross Entropy Loss
- Results: Your accuracy, F1-score, confusion matrix charts
