In [1]:
# ====================================================================================================
# CELL 1: Imports & Setup
# ====================================================================================================

import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import joblib
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Machine Learning Libraries
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (classification_report, confusion_matrix, 
                           accuracy_score, precision_score, recall_score, f1_score,
                           roc_auc_score, roc_curve)

# Deep Learning Libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

# Add custom modules
sys.path.append('../src')
from preprocess import load_and_preprocess_data

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Define paths
DATA_PATH = "../data/UNSW-NB15_full.csv"  # Your preprocessed dataset
MODELS_PATH = "../models/"
RESULTS_PATH = "../results/"

# Create directories if they don't exist
os.makedirs(MODELS_PATH, exist_ok=True)
os.makedirs(RESULTS_PATH, exist_ok=True)

print("‚úÖ Libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {len(tf.config.list_physical_devices('GPU')) > 0}")




ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

In [2]:
# ====================================================================================================
# CELL 2: Load Data
# ====================================================================================================

print("Loading preprocessed UNSW-NB15 dataset...")
print("=" * 50)

# üëâ DECISION: Choose task type
TASK_TYPE = "binary"  # Change to "multiclass" for attack category classification

# Load preprocessed data
try:
    X_processed, y, preprocessor = load_and_preprocess_data(DATA_PATH, task=TASK_TYPE)
    
    print(f"‚úÖ Dataset loaded successfully!")
    print(f"Features shape: {X_processed.shape}")
    print(f"Target shape: {y.shape}")
    
    # Class distribution
    if TASK_TYPE == "binary":
        print(f"\nBinary Classification Target Distribution:")
        print(f"Benign (0): {sum(y == 0)}")
        print(f"Attack (1): {sum(y == 1)}")
        print(f"Attack percentage: {sum(y == 1) / len(y) * 100:.2f}%")
    else:
        print(f"\nMulticlass Target Distribution:")
        print(y.value_counts())
        
except FileNotFoundError:
    print("‚ùå Preprocessed dataset not found!")
    print("Please run data preprocessing first or check the file path.")
    print("Expected file: UNSW-NB15_full.csv")
    
except Exception as e:
    print(f"‚ùå Error loading data: {str(e)}")

Loading preprocessed UNSW-NB15 dataset...
‚ùå Error loading data: name 'load_and_preprocess_data' is not defined
