# IDS Exploration Notebook
This notebook loads a demo dataset (KDDCup99), preprocesses it, and trains/evaluates baseline models.

In [None]:
# Imports
import pandas as pd
import numpy as np
from pathlib import Path

# Local modules
from src.ids.data import load_dataset
from src.ids.preprocess import split_features_labels, train_val_test_split, make_preprocess_pipeline
from src.ids.models import build_rf
from src.ids.evaluate import classification_metrics, print_confusion

ROOT = Path(__file__).resolve().parents[2] if '__file__' in globals() else Path('.')
ROOT

In [None]:
# Load dataset
df = load_dataset(name='kddcup99')
df.head()

In [None]:
# Preprocess and split
X, y = split_features_labels(df, label_col='binary_label')
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(X, y)
preprocess = make_preprocess_pipeline(X_train)

In [None]:
# Train a RandomForest baseline
from sklearn.pipeline import Pipeline
rf = build_rf()
pipe = Pipeline(steps=[('pre', preprocess), ('clf', rf)])
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
try:
    y_proba = pipe.predict_proba(X_test)[:, 1]
except Exception:
    y_proba = None
metrics = classification_metrics(y_test, y_pred, y_proba)
cm = print_confusion(y_test, y_pred)
metrics, cm