import pandas as pd
from sklearn.model_selection import train_test_split
from src.constants import DATA_PATH, TARGET_COL, SEED
from src.train_supervised import train_supervised
from src.train_ae import train_autoencoder
from src.fusion import run_fusion
from src.evaluate import evaluate_and_save

# --- Load dataset ---
df = pd.read_csv(DATA_PATH)
X = df.drop(columns=[TARGET_COL])
y = df[TARGET_COL]

# --- Split ---
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=SEED
)

# --- Train supervised model ---
sup_pipeline = train_supervised()

# --- Train autoencoder ---
ae_model, preproc = train_autoencoder()

# --- Run fusion ---
preds = run_fusion(sup_pipeline, ae_model, X_val, y_val, preproc)

# --- Evaluate ---
evaluate_and_save(y_val, preds, name="hybrid_fusion")

In [2]:
import os
os.chdir(r"c:\Users\ASUS\OneDrive\Desktop\BTP PROJECT\BTP_Implementation\iiot-hybrid-detection")
print("CWD:", os.getcwd())

CWD: c:\Users\ASUS\OneDrive\Desktop\BTP PROJECT\BTP_Implementation\iiot-hybrid-detection


In [3]:
# fusion.ipynb

import os
import pandas as pd
from sklearn.model_selection import train_test_split
from src.constants import DATA_PATH, TARGET_COL, SEED
from src.train_supervised import train_supervised
from src.train_ae import train_autoencoder
from src.fusion import run_fusion
from src.evaluate import evaluate_and_save
import joblib

# Ensure results folder exists
os.makedirs("results/fusion", exist_ok=True)

# ================================
# STEP 1. Load Dataset
# ================================
print("Loading dataset...")
df = pd.read_csv(DATA_PATH)
df[TARGET_COL] = df[TARGET_COL].astype(str).str.lower()

X = df.drop(columns=[TARGET_COL])
y = df[TARGET_COL]

print("Dataset shape:", df.shape)
print("Class distribution:\n", y.value_counts())

# ================================
# STEP 2. Split Data
# ================================
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=SEED
)

print("Training set:", X_train.shape, "Validation set:", X_val.shape)

# ================================
# STEP 3. Train or Load Supervised Model
# ================================
SUP_MODEL_PATH = "results/models/supervised_pipeline.pkl"

if os.path.exists(SUP_MODEL_PATH):
    print("Loading existing supervised model...")
    sup_pipeline = joblib.load(SUP_MODEL_PATH)
else:
    print("Training supervised model...")
    sup_pipeline = train_supervised()
    joblib.dump(sup_pipeline, SUP_MODEL_PATH)
    print("Supervised model saved at", SUP_MODEL_PATH)

# ================================
# STEP 4. Train or Load Autoencoder
# ================================
AE_MODEL_PATH = "results/models/autoencoder.pkl"
AE_PREPROC_PATH = "results/models/ae_preprocessor.pkl"

if os.path.exists(AE_MODEL_PATH) and os.path.exists(AE_PREPROC_PATH):
    print("Loading existing autoencoder + preprocessor...")
    ae_model = joblib.load(AE_MODEL_PATH)
    ae_preproc = joblib.load(AE_PREPROC_PATH)
else:
    print("Training autoencoder...")
    ae_model, ae_preproc = train_autoencoder()
    joblib.dump(ae_model, AE_MODEL_PATH)
    joblib.dump(ae_preproc, AE_PREPROC_PATH)
    print("Autoencoder + preprocessor saved.")

# ================================
# STEP 5. Run Fusion
# ================================
print("Running hybrid fusion...")
fusion_preds, fusion_scores = run_fusion(
    sup_pipeline=sup_pipeline,
    ae_model=ae_model,
    X_val=X_val,
    y_val=y_val,
    preproc=ae_preproc
)

# Save fusion outputs for analysis
fusion_results = pd.DataFrame({
    "True_Label": y_val.values,
    "Predicted_Label": fusion_preds,
    "Fusion_Score": fusion_scores
})

fusion_results.to_csv("results/fusion/fusion_predictions.csv", index=False)
print("Fusion predictions saved at results/fusion/fusion_predictions.csv")

# ================================
# STEP 6. Evaluate
# ================================
print("Evaluating hybrid fusion model...")
evaluate_and_save(y_val, fusion_preds, name="hybrid_fusion")

print("✅ Hybrid Fusion workflow complete! Check results/fusion/ for outputs.")


Loading dataset...
Dataset shape: (211043, 44)
Class distribution:
 type
normal        50000
backdoor      20000
ddos          20000
dos           20000
injection     20000
password      20000
scanning      20000
ransomware    20000
xss           20000
mitm           1043
Name: count, dtype: int64
Training set: (168834, 43) Validation set: (42209, 43)
Loading existing supervised model...
Loading existing autoencoder + preprocessor...
Running hybrid fusion...
Fusion predictions saved at results/fusion/fusion_predictions.csv
Evaluating hybrid fusion model...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0     0.0000    0.0000    0.0000       0.0
           1     0.0000    0.0000    0.0000       0.0
    backdoor     0.0000    0.0000    0.0000    4000.0
        ddos     0.0000    0.0000    0.0000    4000.0
         dos     0.0000    0.0000    0.0000    4000.0
   injection     0.0000    0.0000    0.0000    4000.0
        mitm     0.0000    0.0000    0.0000     209.0
      normal     0.0000    0.0000    0.0000   10000.0
    password     0.0000    0.0000    0.0000    4000.0
  ransomware     0.0000    0.0000    0.0000    4000.0
    scanning     0.0000    0.0000    0.0000    4000.0
         xss     0.0000    0.0000    0.0000    4000.0

    accuracy                         0.0000   42209.0
   macro avg     0.0000    0.0000    0.0000   42209.0
weighted avg     0.0000    0.0000    0.0000   42209.0

✅ Evaluation saved to results/eval/hybrid_fusion_classification_report.txt and PNG
✅ Hybrid Fusion workflow complete! Check results/f