In [1]:
import os
os.chdir(r"c:\Users\ASUS\OneDrive\Desktop\BTP PROJECT\BTP_Implementation\iiot-hybrid-detection")
print("CWD:", os.getcwd())

CWD: c:\Users\ASUS\OneDrive\Desktop\BTP PROJECT\BTP_Implementation\iiot-hybrid-detection


In [2]:
pip install torch

Note: you may need to restart the kernel to use updated packages.


In [1]:
pip install tqdm


Note: you may need to restart the kernel to use updated packages.


In [4]:
import os, joblib, json
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support, roc_curve, auc

from src.train_ae import train_autoencoder
from src.constants import DATA_PATH, TARGET_COL, SEED


In [3]:
# Train autoencoder on Normal traffic only
ae_model, preproc = train_autoencoder()

# Save trained model + preprocessing
os.makedirs("results/models", exist_ok=True)
joblib.dump(ae_model, "results/models/autoencoder.pkl")
joblib.dump(preproc, "results/models/ae_preprocessor.pkl")
print("✅ Autoencoder & preprocessor saved.")


Epoch 1/20, Loss: 0.0092
Epoch 2/20, Loss: 0.0047
Epoch 3/20, Loss: 0.0037
Epoch 4/20, Loss: 0.0036
Epoch 5/20, Loss: 0.0030
Epoch 6/20, Loss: 0.0032
Epoch 7/20, Loss: 0.0037
Epoch 8/20, Loss: 0.0033
Epoch 9/20, Loss: 0.0025
Epoch 10/20, Loss: 0.0023
Epoch 11/20, Loss: 0.0021
Epoch 12/20, Loss: 0.0023
Epoch 13/20, Loss: 0.0023
Epoch 14/20, Loss: 0.0014
Epoch 15/20, Loss: 0.0018
Epoch 16/20, Loss: 0.0023
Epoch 17/20, Loss: 0.0020
Epoch 18/20, Loss: 0.0013
Epoch 19/20, Loss: 0.0027
Epoch 20/20, Loss: 0.0021
✅ Autoencoder & preprocessor saved.


In [5]:
# Load dataset
df = pd.read_csv(DATA_PATH)

# Preprocess
X_all = preproc.transform(df.drop(columns=[TARGET_COL]))
y_true = (df[TARGET_COL] != "normal").astype(int)   # anomaly = 1, normal = 0

# Reconstruction
recon = ae_model(torch.tensor(X_all).float()).detach().numpy()
errors = ((X_all - recon) ** 2).mean(axis=1)

# Threshold (mean + 3*std)
threshold = errors.mean() + 3*errors.std()
y_pred = (errors > threshold).astype(int)

# Metrics
roc = roc_auc_score(y_true, errors)
prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary")
print(f"ROC-AUC={roc:.3f} | Precision={prec:.3f} | Recall={rec:.3f} | F1={f1:.3f}")


NameError: name 'preproc' is not defined

In [None]:
os.makedirs("results/reports", exist_ok=True)
report = {
    "roc_auc": float(roc),
    "precision": float(prec),
    "recall": float(rec),
    "f1": float(f1),
    "threshold": float(threshold)
}
with open("results/reports/autoencoder_report.json", "w") as f:
    json.dump(report, f, indent=4)

print("✅ Report saved to results/reports/autoencoder_report.json")


In [None]:
os.makedirs("results/plots", exist_ok=True)

# Error histogram
plt.hist(errors[y_true==0], bins=50, alpha=0.5, label="Normal")
plt.hist(errors[y_true==1], bins=50, alpha=0.5, label="Attack")
plt.legend(); plt.title("Reconstruction Errors")
plt.xlabel("Error"); plt.ylabel("Count")
plt.savefig("results/plots/ae_error_hist.png")
plt.show()

# ROC Curve
fpr, tpr, _ = roc_curve(y_true, errors)
plt.plot(fpr, tpr, label=f"AUC={auc(fpr,tpr):.3f}")
plt.plot([0,1],[0,1],"--")
plt.xlabel("FPR"); plt.ylabel("TPR")
plt.title("Autoencoder ROC Curve")
plt.legend()
plt.savefig("results/plots/ae_roc.png")
plt.show()


In [3]:
from src.plot_utils import plot_training_curves

# Example dummy data to test plotting
history = {
    "train_acc": [0.81, 0.85, 0.89, 0.91, 0.93, 0.94, 0.945],
    "val_acc":   [0.80, 0.83, 0.87, 0.90, 0.91, 0.935, 0.940],
    "train_loss": [0.48, 0.35, 0.28, 0.23, 0.19, 0.16, 0.14],
    "val_loss":   [0.50, 0.38, 0.32, 0.27, 0.23, 0.20, 0.18]
}

plot_training_curves(history, model_name="test_model")


✅ Plots saved in results/plots/ as test_model_accuracy.png and test_model_loss.png


In [6]:
# notebook cell
from src.train_ae import train_autoencoder
import torch, os

# choose device (optional override)
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# train for 50 epochs
ae_model, preproc, history = train_autoencoder(
    epochs=50,
    batch_size=256,
    lr=1e-3,
    device=device,
    checkpoint_dir="results/models",
    checkpoint_name="autoencoder_checkpoint.pth",
    save_best_as="results/models/autoencoder_state.pt",
    resume_from=None,  # or "results/models/autoencoder_checkpoint.pth" to resume
    early_stopping_patience=None  # or e.g. 8
)

print("Training complete. Best model saved to results/models/autoencoder_state.pt")


Using device: cpu
Epoch [1/50]  train_loss=0.011365  val_loss=0.015208
[saved best] epoch 1 val_loss=0.015208
Epoch [2/50]  train_loss=0.004333  val_loss=0.013682
[saved best] epoch 2 val_loss=0.013682
Epoch [3/50]  train_loss=0.003685  val_loss=0.016441
Epoch [4/50]  train_loss=0.003533  val_loss=0.010751
[saved best] epoch 4 val_loss=0.010751
Epoch [5/50]  train_loss=0.003077  val_loss=0.009860
[saved best] epoch 5 val_loss=0.009860
Epoch [6/50]  train_loss=0.001848  val_loss=0.010985
Epoch [7/50]  train_loss=0.002748  val_loss=0.020543
Epoch [8/50]  train_loss=0.003247  val_loss=0.012216
Epoch [9/50]  train_loss=0.003019  val_loss=0.010934
Epoch [10/50]  train_loss=0.002486  val_loss=0.009435
[saved best] epoch 10 val_loss=0.009435
Epoch [11/50]  train_loss=0.001906  val_loss=0.008000
[saved best] epoch 11 val_loss=0.008000
Epoch [12/50]  train_loss=0.001558  val_loss=0.009723
Epoch [13/50]  train_loss=0.001907  val_loss=0.015032
Epoch [14/50]  train_loss=0.003590  val_loss=0.010856

In [7]:
from src.plot_utils import plot_training_curves

# history is dict with lists: train_acc, val_acc, train_loss, val_loss
plot_training_curves(history, model_name="autoencoder_50epochs")


✅ Plots saved in results/plots/ as autoencoder_50epochs_accuracy.png and autoencoder_50epochs_loss.png


In [8]:
import numpy as np, pandas as pd, torch, joblib, os
from sklearn.metrics import roc_auc_score, roc_curve, precision_recall_curve
from src.constants import DATA_PATH, TARGET_COL

# load df and preproc + model
df = pd.read_csv(DATA_PATH)
df[TARGET_COL] = df[TARGET_COL].astype(str).str.lower()

preproc = joblib.load("results/models/ae_preprocessor.joblib")
model = torch.load("results/models/autoencoder_state.pt", map_location="cpu")
# if the model object class is defined in src.train_ae.Autoencoder, instantiate and load:
from src.train_ae import Autoencoder
input_dim = preproc.transform(df.drop(columns=[TARGET_COL]).iloc[:1]).shape[1]
ae = Autoencoder(input_dim=input_dim)
ae.load_state_dict(torch.load("results/models/autoencoder_state.pt", map_location="cpu"))
ae.eval()

# transform everything
X = preproc.transform(df.drop(columns=[TARGET_COL]))
with torch.no_grad():
    recon = ae(torch.tensor(X).float()).numpy()
errors = ((X - recon) ** 2).mean(axis=1)

# true labels: 0 normal, 1 attack
y_true = (~(df[TARGET_COL].str.lower() == "normal")).astype(int).values
auc = roc_auc_score(y_true, errors)
print("AE ROC-AUC:", auc)

# save errors and results
np.save("results/reports/ae_errors.npy", errors)
pd.DataFrame({"true":y_true, "error":errors}).to_csv("results/reports/ae_errors_table.csv", index=False)

# save ROC plot and histogram (use earlier eval code or write a quick plot)
import matplotlib.pyplot as plt
fpr, tpr, _ = roc_curve(y_true, errors)
plt.figure(); plt.plot(fpr, tpr); plt.plot([0,1],[0,1],'--'); plt.title(f"AE ROC (AUC={auc:.3f})"); plt.savefig("results/reports/ae_roc.png", dpi=300); plt.close()

plt.figure(figsize=(8,5))
plt.hist(errors[y_true==0], bins=80, alpha=0.6, label="normal")
plt.hist(errors[y_true==1], bins=80, alpha=0.6, label="attack")
plt.legend(); plt.title("AE Reconstruction Error"); plt.savefig("results/reports/ae_error_hist.png", dpi=300); plt.close()


MemoryError: Unable to allocate 2.05 GiB for an array with shape (211043, 1304) and data type float64

In [12]:
import numpy as np
import pandas as pd
import torch
import joblib
import os
from sklearn.metrics import roc_auc_score, roc_curve
from src.constants import DATA_PATH, TARGET_COL
from src.train_ae import Autoencoder
import matplotlib.pyplot as plt

# ======================================
# 1. Load data
# ======================================
print("Loading dataset...")
df = pd.read_csv(DATA_PATH)
df[TARGET_COL] = df[TARGET_COL].astype(str).str.lower()

# Load preprocessor and model
preproc = joblib.load("results/models/ae_preprocessor.joblib")

input_dim = preproc.transform(df.drop(columns=[TARGET_COL]).iloc[:1]).shape[1]
ae = Autoencoder(input_dim=input_dim)
ae.load_state_dict(torch.load("results/models/autoencoder_state.pt", map_location="cpu"))
ae.eval()

# Ensure results folder exists
os.makedirs("results/reports", exist_ok=True)

# ======================================
# 2. Batch processing
# ======================================
BATCH_SIZE = 5000  # Adjust based on memory
n_samples = df.shape[0]
errors = []

print(f"Processing {n_samples} samples in batches of {BATCH_SIZE}...")

# Loop through batches
for start in range(0, n_samples, BATCH_SIZE):
    end = min(start + BATCH_SIZE, n_samples)
    
    # --- Use DataFrame slice instead of NumPy ---
    batch_df = df.iloc[start:end].drop(columns=[TARGET_COL])
    
    # Transform using the preprocessor
    X_batch = preproc.transform(batch_df)
    
    # Convert to tensor
    X_tensor = torch.tensor(X_batch, dtype=torch.float32)
    
    # Autoencoder reconstruction
    with torch.no_grad():
        recon = ae(X_tensor).numpy()
    
    # Compute reconstruction error for batch
    batch_errors = np.mean((X_batch - recon) ** 2, axis=1)
    errors.extend(batch_errors)

    print(f"Processed {end}/{n_samples} samples...")

errors = np.array(errors)

# ======================================
# 3. Evaluate
# ======================================
# true labels: 0 = normal, 1 = attack
y_true = (~(df[TARGET_COL].str.lower() == "normal")).astype(int).values
auc = roc_auc_score(y_true, errors)
print("✅ AE ROC-AUC:", auc)

# ======================================
# 4. Save results
# ======================================
np.save("results/reports/ae_errors.npy", errors)
pd.DataFrame({"true": y_true, "error": errors}).to_csv("results/reports/ae_errors_table.csv", index=False)
print("Saved AE error table and numpy array.")

# ======================================
# 5. Plot ROC & histogram
# ======================================
# --- ROC Curve ---
fpr, tpr, _ = roc_curve(y_true, errors)
plt.figure()
plt.plot(fpr, tpr, label=f"AUC = {auc:.3f}")
plt.plot([0, 1], [0, 1], '--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Autoencoder ROC Curve")
plt.legend()
plt.savefig("results/reports/ae_roc.png", dpi=300)
plt.close()
print("Saved ROC curve.")


Loading dataset...
Processing 211043 samples in batches of 5000...
Processed 5000/211043 samples...
Processed 10000/211043 samples...
Processed 15000/211043 samples...
Processed 20000/211043 samples...
Processed 25000/211043 samples...
Processed 30000/211043 samples...
Processed 35000/211043 samples...
Processed 40000/211043 samples...
Processed 45000/211043 samples...
Processed 50000/211043 samples...
Processed 55000/211043 samples...
Processed 60000/211043 samples...
Processed 65000/211043 samples...
Processed 70000/211043 samples...
Processed 75000/211043 samples...
Processed 80000/211043 samples...
Processed 85000/211043 samples...
Processed 90000/211043 samples...
Processed 95000/211043 samples...
Processed 100000/211043 samples...
Processed 105000/211043 samples...
Processed 110000/211043 samples...
Processed 115000/211043 samples...
Processed 120000/211043 samples...
Processed 125000/211043 samples...
Processed 130000/211043 samples...
Processed 135000/211043 samples...
Processe

In [13]:
# --- ROC Curve ---
fpr, tpr, _ = roc_curve(y_true, errors)
plt.figure()
plt.plot(fpr, tpr, label=f"AUC = {auc:.3f}")
plt.plot([0, 1], [0, 1], '--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Autoencoder ROC Curve")
plt.legend()
plt.savefig("results/reports/ae_roc.png", dpi=300)
plt.close()   # ✅ fixed here

# --- Histogram ---
plt.figure(figsize=(8, 5))
plt.hist(errors[y_true == 0], bins=80, alpha=0.6, label="Normal")
plt.hist(errors[y_true == 1], bins=80, alpha=0.6, label="Attack")
plt.legend()
plt.title("AE Reconstruction Error Distribution")
plt.savefig("results/reports/ae_error_hist.png", dpi=300)
plt.close()   # ✅ fixed here
