perbaikan GIR di 4.3

## Import Library

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import (classification_report, confusion_matrix,
                            accuracy_score, precision_recall_fscore_support,
                            matthews_corrcoef)
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import joblib
import gc
import warnings
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Tahapan 1: Data Loading & Preprocessing

**1.1 Load Dataset**

In [None]:
df = pd.read_parquet('/content/drive/My Drive/Dataset/CIC_IIoT_2025/final_dataset.parquet')

print(f"Dataset loaded successfully!")
print(f"  - Total rows: {df.shape[0]:,}")
print(f"  - Total columns: {df.shape[1]}")
print(f"  - Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

Dataset loaded successfully!
  - Total rows: 685,671
  - Total columns: 95
  - Memory usage: 3994.50 MB


**1.2 Drop Columns yang Tidak Perlu**

In [None]:
# Define columns to drop
drop_columns = [
    # Metadata (tidak predictive)
    'device_name', 'device_mac',
    'timestamp', 'timestamp_start', 'timestamp_end',

    # Labels (keep label2 only sebagai target)
    'label_full', 'label1', 'label3', 'label4',

    # List columns (pakai count saja)
    'log_data-types',
    'network_ips_all', 'network_ips_dst', 'network_ips_src',
    'network_macs_all', 'network_macs_dst', 'network_macs_src',
    'network_ports_all', 'network_ports_dst', 'network_ports_src',
    'network_protocols_all', 'network_protocols_dst', 'network_protocols_src'
]

In [None]:
# Verify all columns exist before dropping
existing_drop_cols = [col for col in drop_columns if col in df.columns]
missing_drop_cols = [col for col in drop_columns if col not in df.columns]

if missing_drop_cols:
    print(f"Warning: Columns not found in dataset: {missing_drop_cols}")

df = df.drop(columns=existing_drop_cols)

print(f"Dropped {len(existing_drop_cols)} columns")
print(f"  - Remaining columns: {df.shape[1]}")
print(f"  - Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")

Dropped 22 columns
  - Remaining columns: 73
  - Memory usage: 412.21 MB


**1.3 Mengkonversi Tipe Data ke Float32 & Int32 Memory Optimization**

In [None]:
initial_memory = df.memory_usage(deep=True).sum() / 1024**2

# Downcast integer columns (kecuali time_window dan label2)
int_cols = df.select_dtypes(include=['int64']).columns.tolist()
exclude_int = ['time_window', 'label2'] if 'label2' in int_cols else ['time_window']

for col in int_cols:
    if col not in exclude_int:
        # Check if values fit in int32
        col_min = df[col].min()
        col_max = df[col].max()

        if col_min >= np.iinfo(np.int32).min and col_max <= np.iinfo(np.int32).max:
            df[col] = df[col].astype('int32')
        else:
            print(f"   {col}: Range too large for int32, keeping int64")

In [None]:
# Downcast float columns
float_cols = df.select_dtypes(include=['float64']).columns.tolist()

for col in float_cols:
    df[col] = df[col].astype('float32')

final_memory = df.memory_usage(deep=True).sum() / 1024**2
memory_saved = initial_memory - final_memory
memory_reduction = (memory_saved / initial_memory) * 100

In [None]:
print(f"Memory optimization complete!")
print(f"  - Initial memory: {initial_memory:.2f} MB")
print(f"  - Final memory: {final_memory:.2f} MB")
print(f"  - Saved: {memory_saved:.2f} MB ({memory_reduction:.1f}% reduction)")

gc.collect()  # Force garbage collection

Memory optimization complete!
  - Initial memory: 412.21 MB
  - Final memory: 226.50 MB
  - Saved: 185.71 MB (45.1% reduction)


30

**1.4 Cek Kualitas Data**

1.4.1 Missing Values

In [None]:
# Check for missing values
missing_values = df.isnull().sum().sum()
print(f"  - Missing values: {missing_values}")

  - Missing values: 0


1.4.2 Infinite Values

In [None]:
# Check for infinite values in float columns
inf_count = 0
float_cols = df.select_dtypes(include=['float32']).columns
for col in float_cols:
    inf_count += np.isinf(df[col]).sum()

print(f"  - Infinite values: {inf_count}")

if inf_count > 0:
    print(f"Replacing infinite values with NaN...")
    df.replace([np.inf, -np.inf], np.nan, inplace=True)

    # Fill NaN with median
    for col in float_cols:
        if df[col].isnull().sum() > 0:
            df[col].fillna(df[col].median(), inplace=True)

    print(f"Infinite values handled")

  - Infinite values: 0


1.4.3 Remove All-Zero Rows

In [None]:
feature_cols = [col for col in df.columns if col not in ['label2', 'time_window']]
all_zero_mask = (df[feature_cols] == 0).all(axis=1)
zero_count = all_zero_mask.sum()

print(f" Found {zero_count:,} all-zero rows ({(zero_count/len(df)*100):.1f}%)")

if zero_count > 0:
    df = df[~all_zero_mask].copy()
    print(f"  Removed {zero_count:,} invalid rows")

 Found 136,798 all-zero rows (20.0%)
  Removed 136,798 invalid rows


1.4.4 Remove EXACT duplicates (same time_window)

In [None]:
feature_cols_with_tw = [col for col in df.columns if col not in ['label2']]
size_before = len(df)

df = df.drop_duplicates(subset=feature_cols_with_tw, keep='first')

exact_dup_removed = size_before - len(df)
print(f" Removed {exact_dup_removed:,} exact duplicates")
print(f"  Remaining: {len(df):,} rows")

 Removed 2,231 exact duplicates
  Remaining: 546,642 rows


1.4.5 Remove TEMPORAL duplicates (different time_window)

In [None]:
size_before_temporal = len(df)

# Get feature columns only (exclude label2 and time_window)
feature_cols = [col for col in df.columns if col not in ['label2', 'time_window']]

print(f"         Checking duplicates across {len(feature_cols)} features...")
print(f"         (This removes rows with identical features regardless of time_window)")

# Remove duplicates based on features only
df = df.drop_duplicates(subset=feature_cols, keep='first')

temporal_dup_removed = size_before_temporal - len(df)
print(f"         ✓ Removed {temporal_dup_removed:,} temporal duplicates")
print(f"         Remaining: {len(df):,} rows")

         Checking duplicates across 71 features...
         (This removes rows with identical features regardless of time_window)
         ✓ Removed 115,049 temporal duplicates
         Remaining: 431,593 rows


1.4.6 Final Class Distribution

In [None]:
for label in sorted(df['label2'].unique()):
    count = (df['label2'] == label).sum()
    pct = (count / len(df)) * 100
    print(f"         {label:12s}: {count:7,} ({pct:5.2f}%)")

print(f"\n  [Summary]")
print(f"    Original size:           {df.shape[0] + zero_count + exact_dup_removed + temporal_dup_removed:,}")
print(f"    After zero removal:      -{zero_count:,}")
print(f"    After exact dup removal: -{exact_dup_removed:,}")
print(f"    After temporal dup:      -{temporal_dup_removed:,}")
print(f"    ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")
print(f"    Final size:              {len(df):,} rows")
print(f"    Total removed:           {zero_count + exact_dup_removed + temporal_dup_removed:,} ({((zero_count + exact_dup_removed + temporal_dup_removed)/(len(df) + zero_count + exact_dup_removed + temporal_dup_removed)*100):.1f}%)")

gc.collect()

         benign      : 179,312 (41.55%)
         bruteforce  :   4,626 ( 1.07%)
         ddos        :  51,841 (12.01%)
         dos         :  55,469 (12.85%)
         malware     :  22,968 ( 5.32%)
         mitm        :  23,549 ( 5.46%)
         recon       :  86,163 (19.96%)
         web         :   7,665 ( 1.78%)

  [Summary]
    Original size:           685,671
    After zero removal:      -136,798
    After exact dup removal: -2,231
    After temporal dup:      -115,049
    ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
    Final size:              431,593 rows
    Total removed:           254,078 (37.1%)


0

**1.5 Train/Test Split (Time-Based)**

In [None]:
train_windows = [1, 2, 3, 4, 5, 6, 7, 8]
test_windows = [9, 10]

train_df = df[df['time_window'].isin(train_windows)].copy()
test_df = df[df['time_window'].isin(test_windows)].copy()

print(f"  ✓ Split complete!")
print(f"    Train: {len(train_df):,} rows ({(len(train_df)/len(df)*100):.1f}%)")
print(f"    Test:  {len(test_df):,} rows ({(len(test_df)/len(df)*100):.1f}%)")

print(f"\n  Train distribution:")
for label in sorted(train_df['label2'].unique()):
    count = (train_df['label2'] == label).sum()
    pct = (count / len(train_df)) * 100
    print(f"    {label:12s}: {count:7,} ({pct:5.2f}%)")

print(f"\n  Test distribution:")
for label in sorted(test_df['label2'].unique()):
    count = (test_df['label2'] == label).sum()
    pct = (count / len(test_df)) * 100
    print(f"    {label:12s}: {count:7,} ({pct:5.2f}%)")

# Cleanup
del df
gc.collect()

  ✓ Split complete!
    Train: 386,274 rows (89.5%)
    Test:  45,319 rows (10.5%)

  Train distribution:
    benign      : 163,578 (42.35%)
    bruteforce  :   4,055 ( 1.05%)
    ddos        :  45,744 (11.84%)
    dos         :  49,107 (12.71%)
    malware     :  20,195 ( 5.23%)
    mitm        :  20,799 ( 5.38%)
    recon       :  76,067 (19.69%)
    web         :   6,729 ( 1.74%)

  Test distribution:
    benign      :  15,734 (34.72%)
    bruteforce  :     571 ( 1.26%)
    ddos        :   6,097 (13.45%)
    dos         :   6,362 (14.04%)
    malware     :   2,773 ( 6.12%)
    mitm        :   2,750 ( 6.07%)
    recon       :  10,096 (22.28%)
    web         :     936 ( 2.07%)


0

Verify NO LEAKAGE

In [None]:
feature_cols = [col for col in train_df.columns if col not in ['label2', 'time_window']]

test_hash = test_df[feature_cols].apply(lambda x: hash(tuple(x)), axis=1)
train_hash = train_df[feature_cols].apply(lambda x: hash(tuple(x)), axis=1)

overlap = test_hash.isin(train_hash).sum()
overlap_pct = (overlap / len(test_df)) * 100

print(f"Test rows with identical features in train: {overlap} / {len(test_df)} ({overlap_pct:.2f}%)")

if overlap == 0:
    print(f" SUCCESS! NO DATA LEAKAGE DETECTED!")
    print(f"   Dataset is ready for unbiased evaluation!")
else:
    print(f" WARNING: Still {overlap_pct:.2f}% leakage!")
    print(f"   Need further investigation...")

Test rows with identical features in train: 0 / 45319 (0.00%)
 SUCCESS! NO DATA LEAKAGE DETECTED!
   Dataset is ready for unbiased evaluation!


In [None]:
print("Tahapan 1 Complete - Data is CLEAN and SPLIT!")

Tahapan 1 Complete - Data is CLEAN and SPLIT!


In [None]:
# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

##Tahapan 2: Exploratory Data Analysis (EDA) ##

**2.1 Class Distribution Analysis**

In [None]:
def plot_class_distribution(train_df, test_df, save_path='class_distribution.png'):
    """Plot class distribution for train and test sets"""
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))

    # Train distribution
    train_counts = train_df['label2'].value_counts().sort_index()
    axes[0].bar(range(len(train_counts)), train_counts.values,
                color='steelblue', edgecolor='black')
    axes[0].set_xticks(range(len(train_counts)))
    axes[0].set_xticklabels(train_counts.index, rotation=45, ha='right')
    axes[0].set_ylabel('Count')
    axes[0].set_title('Train Set Class Distribution')
    axes[0].grid(axis='y', alpha=0.3)

    # Add percentage labels
    for i, v in enumerate(train_counts.values):
        pct = (v / len(train_df)) * 100
        axes[0].text(i, v, f'{pct:.1f}%', ha='center', va='bottom', fontsize=9)

    # Test distribution
    test_counts = test_df['label2'].value_counts().sort_index()
    axes[1].bar(range(len(test_counts)), test_counts.values,
                color='coral', edgecolor='black')
    axes[1].set_xticks(range(len(test_counts)))
    axes[1].set_xticklabels(test_counts.index, rotation=45, ha='right')
    axes[1].set_ylabel('Count')
    axes[1].set_title('Test Set Class Distribution')
    axes[1].grid(axis='y', alpha=0.3)

    # Add percentage labels
    for i, v in enumerate(test_counts.values):
        pct = (v / len(test_df)) * 100
        axes[1].text(i, v, f'{pct:.1f}%', ha='center', va='bottom', fontsize=9)

    plt.tight_layout()
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    print(f"   Saved: {save_path}")
    plt.close()

    # Print imbalance ratio
    print("\n   Imbalance Ratios (majority:minority):")
    max_count = train_counts.max()
    for label, count in train_counts.items():
        ratio = max_count / count
        print(f"      {label:12s}: 1:{ratio:.2f}")

In [None]:
# Call function
plot_class_distribution(train_df, test_df)

   Saved: class_distribution.png

   Imbalance Ratios (majority:minority):
      benign      : 1:1.00
      bruteforce  : 1:40.34
      ddos        : 1:3.58
      dos         : 1:3.33
      malware     : 1:8.10
      mitm        : 1:7.86
      recon       : 1:2.15
      web         : 1:24.31


**2.2 Feature Correlation**

In [None]:
def analyze_feature_correlation(train_df, top_n=20, save_path='feature_correlation.png'):
    """Analyze correlation between features"""
    # Get numeric features only
    feature_cols = [col for col in train_df.columns
                   if col not in ['label2', 'time_window']]

    # Sample for memory efficiency (10k rows)
    sample_df = train_df[feature_cols].sample(n=min(10000, len(train_df)),
                                               random_state=42)

    # Calculate correlation matrix
    corr_matrix = sample_df.corr()

    # Get top correlated pairs
    corr_pairs = []
    for i in range(len(corr_matrix.columns)):
        for j in range(i+1, len(corr_matrix.columns)):
            corr_pairs.append((
                corr_matrix.columns[i],
                corr_matrix.columns[j],
                abs(corr_matrix.iloc[i, j])
            ))

    # Sort by correlation
    corr_pairs.sort(key=lambda x: x[2], reverse=True)

    print(f"Top {top_n} Highly Correlated Feature Pairs:")
    for feat1, feat2, corr_val in corr_pairs[:top_n]:
        print(f"      {feat1:40s} <-> {feat2:40s}: {corr_val:.3f}")

    # Plot top correlations
    top_features = set()
    for feat1, feat2, _ in corr_pairs[:top_n]:
        top_features.add(feat1)
        top_features.add(feat2)

    top_features = list(top_features)[:30]  # Limit to 30 features

    plt.figure(figsize=(14, 12))
    sns.heatmap(corr_matrix.loc[top_features, top_features],
                cmap='coolwarm', center=0,
                square=True, linewidths=0.5,
                cbar_kws={"shrink": 0.8})
    plt.title(f'Feature Correlation Heatmap (Top {len(top_features)} Features)')
    plt.tight_layout()
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    print(f"   Saved: {save_path}")
    plt.close()

    del sample_df, corr_matrix
    gc.collect()

# Call function
analyze_feature_correlation(train_df)

Top 20 Highly Correlated Feature Pairs:
      network_macs_dst_count                   <-> network_macs_src_count                  : 1.000
      network_header-length_avg                <-> network_header-length_min               : 1.000
      network_mss_avg                          <-> network_mss_max                         : 1.000
      network_header-length_avg                <-> network_header-length_max               : 1.000
      network_header-length_max                <-> network_header-length_min               : 1.000
      network_ip-length_max                    <-> network_packet-size_max                 : 0.999
      network_ips_all_count                    <-> network_ips_dst_count                   : 0.999
      network_ip-length_max                    <-> network_payload-length_max              : 0.997
      network_packet-size_max                  <-> network_payload-length_max              : 0.997
      log_data-ranges_avg                      <-> log_data-ranges_ma

**2.3 Feature Group Statistics**

In [None]:
def analyze_feature_groups(train_df):
    """Analyze statistics per feature group"""

    # Define feature groups based on CIC-IIoT-2025 dataset
    feature_groups = {
        'log_stats': [col for col in train_df.columns if col.startswith('log_')],
        'packet_rate': [col for col in train_df.columns
                       if 'interval' in col or 'packets_' in col and '_count' in col],
        'size_length': [col for col in train_df.columns
                       if any(x in col for x in ['length', 'size', 'mss'])],
        'tcp_flags': [col for col in train_df.columns if 'tcp-flags' in col],
        'ip_flags': [col for col in train_df.columns if 'ip-flags' in col],
        'address_diversity': [col for col in train_df.columns
                             if any(x in col for x in ['ips_', 'macs_'])],
        'network_multiplexing': [col for col in train_df.columns
                                if any(x in col for x in ['ports_', 'protocols_'])],
        'timing_control': [col for col in train_df.columns
                          if any(x in col for x in ['time-delta', 'ttl', 'window-size'])],
        'fragmentation': [col for col in train_df.columns if 'fragment' in col]
    }

    print("Feature Group Summary:")
    group_stats = {}
    for group_name, features in feature_groups.items():
        features = [f for f in features if f in train_df.columns]
        if len(features) > 0:
            group_data = train_df[features]
            stats = {
                'count': len(features),
                'mean_avg': group_data.mean().mean(),
                'mean_std': group_data.std().mean(),
                'missing_pct': (group_data.isnull().sum().sum() /
                               (len(group_data) * len(features))) * 100
            }
            group_stats[group_name] = stats
            print(f"      {group_name:25s}: {stats['count']:2d} features | "
                  f"Avg: {stats['mean_avg']:8.2f} | "
                  f"Std: {stats['mean_std']:8.2f}")

    return feature_groups, group_stats

# Call function
feature_groups, group_stats = analyze_feature_groups(train_df)

Feature Group Summary:
      log_stats                :  7 features | Avg:    69.57 | Std:   173.79
      packet_rate              :  5 features | Avg: 14585.04 | Std: 49261.75
      size_length              : 24 features | Avg:  3104.45 | Std:  3014.41
      tcp_flags                : 10 features | Avg:  2781.08 | Std: 23655.68
      ip_flags                 :  4 features | Avg:     0.74 | Std:     0.62
      address_diversity        :  6 features | Avg:     6.09 | Std:    13.75
      network_multiplexing     :  6 features | Avg:  2089.34 | Std:  6647.40
      timing_control           : 12 features | Avg:  5922.98 | Std:  5503.67
      fragmentation            :  2 features | Avg:   635.05 | Std:  3126.69


**2.4 Temporal Pattern Analysis**

In [None]:
def analyze_temporal_patterns(train_df, test_df, save_path='temporal_patterns.png'):
    """Analyze attack patterns across time windows"""

    fig, axes = plt.subplots(2, 1, figsize=(14, 10))

    # Train temporal distribution
    train_temporal = train_df.groupby(['time_window', 'label2']).size().unstack(fill_value=0)
    train_temporal.plot(kind='bar', stacked=True, ax=axes[0],
                       colormap='tab10', edgecolor='black', linewidth=0.5)
    axes[0].set_title('Train Set: Attack Distribution Across Time Windows')
    axes[0].set_xlabel('Time Window')
    axes[0].set_ylabel('Count')
    axes[0].legend(title='Attack Type', bbox_to_anchor=(1.05, 1), loc='upper left')
    axes[0].grid(axis='y', alpha=0.3)

    # Test temporal distribution
    test_temporal = test_df.groupby(['time_window', 'label2']).size().unstack(fill_value=0)
    test_temporal.plot(kind='bar', stacked=True, ax=axes[1],
                      colormap='tab10', edgecolor='black', linewidth=0.5)
    axes[1].set_title('Test Set: Attack Distribution Across Time Windows')
    axes[1].set_xlabel('Time Window')
    axes[1].set_ylabel('Count')
    axes[1].legend(title='Attack Type', bbox_to_anchor=(1.05, 1), loc='upper left')
    axes[1].grid(axis='y', alpha=0.3)

    plt.tight_layout()
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    print(f"   Saved: {save_path}")
    plt.close()

    # Print temporal statistics
    print("\n   Temporal Statistics:")
    print(f"      Train windows: {sorted(train_df['time_window'].unique())}")
    print(f"      Test windows:  {sorted(test_df['time_window'].unique())}")

    for label in sorted(train_df['label2'].unique()):
        train_windows = train_df[train_df['label2'] == label]['time_window'].value_counts()
        print(f"\n      {label:12s}:")
        print(f"         Appears in {len(train_windows)} windows")
        print(f"         Avg per window: {train_windows.mean():.0f}")
        print(f"         Std: {train_windows.std():.0f}")

# Call function
analyze_temporal_patterns(train_df, test_df)

   Saved: temporal_patterns.png

   Temporal Statistics:
      Train windows: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8)]
      Test windows:  [np.int64(9), np.int64(10)]

      benign      :
         Appears in 8 windows
         Avg per window: 20447
         Std: 19494

      bruteforce  :
         Appears in 8 windows
         Avg per window: 507
         Std: 407

      ddos        :
         Appears in 8 windows
         Avg per window: 5718
         Std: 4769

      dos         :
         Appears in 8 windows
         Avg per window: 6138
         Std: 5058

      malware     :
         Appears in 8 windows
         Avg per window: 2524
         Std: 2040

      mitm        :
         Appears in 8 windows
         Avg per window: 2600
         Std: 2092

      recon       :
         Appears in 8 windows
         Avg per window: 9508
         Std: 7890

      web         :
         Appears in 8 windows
         Avg per 

In [None]:
print("Tahapan 2 Complete!")
gc.collect()

Tahapan 2 Complete!


21467

## Tahapan 3: Feature Engineering

**3.1 Time Window Features**

In [None]:
def create_lightweight_temporal_features(df):
    """
    Create lightweight temporal features
    Only add 5 aggregate features to minimize overhead
    """
    print("   Creating temporal aggregate features...")

    # Sort by time_window and label
    df = df.sort_values(['label2', 'time_window']).reset_index(drop=True)

    # Get numeric columns (exclude label and time_window)
    numeric_cols = [col for col in df.select_dtypes(include=['float32', 'int32']).columns
                   if col not in ['label2', 'time_window']]

    # Create 5 aggregate features only
    temporal_features = []

    # 1. Trend indicator (3-window moving average)
    df['trend_indicator'] = df.groupby('label2')[numeric_cols[0]]\
        .rolling(window=3, min_periods=1).mean()\
        .reset_index(0, drop=True).astype('float32')
    temporal_features.append('trend_indicator')

    # 2. Volatility indicator (3-window moving std)
    df['volatility_indicator'] = df.groupby('label2')[numeric_cols[0]]\
        .rolling(window=3, min_periods=1).std()\
        .reset_index(0, drop=True).fillna(0).astype('float32')
    temporal_features.append('volatility_indicator')

    # 3-5. Rolling stats for top 3 important features (we'll use first 3 numeric)
    for i, col in enumerate(numeric_cols[:3], 1):
        feat_name = f'roll_mean_{i}'
        df[feat_name] = df.groupby('label2')[col]\
            .rolling(window=3, min_periods=1).mean()\
            .reset_index(0, drop=True).astype('float32')
        temporal_features.append(feat_name)

    print(f"   Created {len(temporal_features)} temporal features")
    print(f"   New shape: {df.shape}")

    return df, temporal_features

# Apply to train and test (OPTIONAL - uncomment if you want to use)
# train_df, temporal_features = create_lightweight_temporal_features(train_df)
# test_df, _ = create_lightweight_temporal_features(test_df)

print("   [SKIPPED] Temporal features are optional for memory efficiency")
temporal_features = []  # Empty list if not used

   [SKIPPED] Temporal features are optional for memory efficiency


**3.2 Feature Scaling/Normalization**

In [None]:
def normalize_features(train_df, test_df, exclude_cols=['label2', 'time_window']):
    """
    Normalize features using StandardScaler
    Fit on train, transform both train and test
    """
    print("   Normalizing features...")

    # Get feature columns
    feature_cols = [col for col in train_df.columns if col not in exclude_cols]

    # Initialize scaler
    scaler = StandardScaler()

    # Fit on train
    train_df[feature_cols] = scaler.fit_transform(train_df[feature_cols])

    # Transform test
    test_df[feature_cols] = scaler.transform(test_df[feature_cols])

    print(f"   Normalized {len(feature_cols)} features")
    print(f"   Scaler mean: {scaler.mean_[:5]}")  # Show first 5
    print(f"   Scaler std:  {scaler.scale_[:5]}")

    return train_df, test_df, scaler, feature_cols

# Apply normalization
train_df, test_df, scaler, feature_cols = normalize_features(train_df, test_df)

   Normalizing features...
   Normalized 71 features
   Scaler mean: [93.03921535 95.34549739 89.46821287  1.87706113  0.44116353]
   Scaler std:  [261.98432792 264.64097313 255.84577702  19.74976552   0.63967933]


**3.3 Feature Validation**

In [None]:
def validate_features(train_df, test_df, feature_cols):
    """Validate features after engineering"""

    print("   Post-engineering validation:")

    # Check for NaN
    train_nan = train_df[feature_cols].isnull().sum().sum()
    test_nan = test_df[feature_cols].isnull().sum().sum()
    print(f"      Train NaN: {train_nan}")
    print(f"      Test NaN:  {test_nan}")

    # Check for Inf
    train_inf = np.isinf(train_df[feature_cols].select_dtypes(include=['float32'])).sum().sum()
    test_inf = np.isinf(test_df[feature_cols].select_dtypes(include=['float32'])).sum().sum()
    print(f"      Train Inf: {train_inf}")
    print(f"      Test Inf:  {test_inf}")

    # Check value ranges
    print(f"\n   Value ranges (first 5 features):")
    for col in feature_cols[:5]:
        print(f"      {col:40s}: [{train_df[col].min():.3f}, {train_df[col].max():.3f}]")

    print(f"\n   Features validated successfully")

# Call validation
validate_features(train_df, test_df, feature_cols)

   Post-engineering validation:
      Train NaN: 0
      Test NaN:  0
      Train Inf: 0.0
      Test Inf:  0.0

   Value ranges (first 5 features):
      log_data-ranges_avg                     : [-0.355, 7.279]
      log_data-ranges_max                     : [-0.360, 7.197]
      log_data-ranges_min                     : [-0.353, 7.468]
      log_data-ranges_std_deviation           : [-0.095, 41.839]
      log_data-types_count                    : [-0.690, 2.437]

   Features validated successfully


**3.4 Pelatihan Model Baseline untuk Benchmark Performa**

In [None]:
print("Memulai pelatihan model baseline untuk mendapatkan F1-Score benchmark...")

# Siapkan data original (sebelum augmentasi)
X_train_original = train_df[feature_cols].values
# Initialize LabelEncoder
le = LabelEncoder()
y_train_original_encoded = le.fit_transform(train_df['label2']) # Gunakan LabelEncoder di sini
X_test_original = test_df[feature_cols].values
y_test_original_encoded = le.transform(test_df['label2'])

# Latih model baseline yang cepat, misalnya RandomForest
print("  - Melatih RandomForest baseline...")
baseline_model = RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1)
baseline_model.fit(X_train_original, y_train_original_encoded)
baseline_pred = baseline_model.predict(X_test_original)

# Hitung dan simpan F1-score per kelas
print("  - Menghitung F1-Score baseline...")
baseline_report = classification_report(y_test_original_encoded, baseline_pred,
                                       target_names=le.classes_,
                                       output_dict=True)

baseline_f1_scores = {cls: baseline_report[cls]['f1-score']
                      for cls in le.classes_}

print("\nSkor F1-Score Baseline yang akan digunakan untuk A-FIGS:")
for cls, f1 in baseline_f1_scores.items():
    print(f"    {cls:12s}: {f1:.4f}")

gc.collect()

Memulai pelatihan model baseline untuk mendapatkan F1-Score benchmark...
  - Melatih RandomForest baseline...
  - Menghitung F1-Score baseline...

Skor F1-Score Baseline yang akan digunakan untuk A-FIGS:
    benign      : 0.9952
    bruteforce  : 1.0000
    ddos        : 0.9968
    dos         : 0.9974
    malware     : 0.9962
    mitm        : 0.9993
    recon       : 0.9931
    web         : 0.9995


48

In [None]:
# Confusion Matrix Base line
print("Confusion Matrix Baseline:")
print(classification_report(y_test_original_encoded, baseline_pred, target_names=le.classes_))

Confusion Matrix Baseline:
              precision    recall  f1-score   support

      benign       0.99      1.00      1.00     15734
  bruteforce       1.00      1.00      1.00       571
        ddos       1.00      0.99      1.00      6097
         dos       1.00      1.00      1.00      6362
     malware       1.00      1.00      1.00      2773
        mitm       1.00      1.00      1.00      2750
       recon       1.00      0.99      0.99     10096
         web       1.00      1.00      1.00       936

    accuracy                           1.00     45319
   macro avg       1.00      1.00      1.00     45319
weighted avg       1.00      1.00      1.00     45319



In [None]:
# Get feature columns
feature_cols = [col for col in train_df.columns if col not in ['label2', 'time_window']]

# Calculate hash for train and test
print(f"\nCalculating feature hashes...")
print(f"  Train samples: {len(train_df):,}")
print(f"  Test samples:  {len(test_df):,}")

test_hash = test_df[feature_cols].apply(lambda x: hash(tuple(x)), axis=1)
train_hash = train_df[feature_cols].apply(lambda x: hash(tuple(x)), axis=1)

# Check overlap
overlap = test_hash.isin(train_hash).sum()
overlap_pct = (overlap / len(test_df)) * 100

print(f"\nResults:")
print(f"  Test rows with same features in train: {overlap:,} / {len(test_df):,}")
print(f"  Leakage percentage: {overlap_pct:.2f}%")

if overlap == 0:
    print(f"\n✅ SUCCESS! NO DATA LEAKAGE!")
elif overlap_pct < 1.0:
    print(f"\n⚠️  Minor leakage detected ({overlap_pct:.2f}%)")
    print(f"   This is acceptable (< 1%)")
else:
    print(f"\n🚨 SIGNIFICANT LEAKAGE! ({overlap_pct:.2f}%)")
    print(f"   Need to investigate!")

# Show leakage distribution by class
if overlap > 0:
    print(f"\nLeakage by class:")
    leaked_indices = test_df.index[test_hash.isin(train_hash)]
    leaked_df = test_df.loc[leaked_indices]

    for label in sorted(leaked_df['label2'].unique()):
        count = (leaked_df['label2'] == label).sum()
        total = (test_df['label2'] == label).sum()
        pct = (count / total) * 100
        print(f"  {label:12s}: {count:5,} / {total:5,} ({pct:5.1f}%)")


Calculating feature hashes...
  Train samples: 386,274
  Test samples:  45,319

Results:
  Test rows with same features in train: 0 / 45,319
  Leakage percentage: 0.00%

✅ SUCCESS! NO DATA LEAKAGE!


In [None]:
# ============================================================================
# HARDER BASELINE TEST - Add Noise
# ============================================================================
print("\n[Testing] Baseline with noisy test set...")

# Add small random noise to test set
np.random.seed(42)
# Use X_test_original which was defined earlier
X_test_noisy = X_test_original.copy()

# Add 5% Gaussian noise
noise_level = 0.05
for i in range(X_test_noisy.shape[1]):
    # Calculate noise based on the original test set's standard deviation
    noise = np.random.normal(0, X_test_original[:, i].std() * noise_level, X_test_noisy.shape[0])
    X_test_noisy[:, i] = X_test_noisy[:, i] + noise

# Test baseline on noisy data
baseline_pred_noisy = baseline_model.predict(X_test_noisy)

# Calculate metrics
from sklearn.metrics import classification_report
# Use y_test_original_encoded which was defined earlier
print("\nBaseline Performance on NOISY Test Set:")
print(classification_report(y_test_original_encoded, baseline_pred_noisy, target_names=le.classes_))


[Testing] Baseline with noisy test set...

Baseline Performance on NOISY Test Set:
              precision    recall  f1-score   support

      benign       0.89      0.76      0.82     15734
  bruteforce       0.42      0.23      0.30       571
        ddos       0.55      0.95      0.70      6097
         dos       0.48      0.48      0.48      6362
     malware       0.77      0.76      0.77      2773
        mitm       0.72      0.18      0.29      2750
       recon       0.83      0.91      0.87     10096
         web       0.96      0.36      0.52       936

    accuracy                           0.73     45319
   macro avg       0.70      0.58      0.59     45319
weighted avg       0.75      0.73      0.72     45319



In [None]:
# ============================================================================
# Cross-Validation on Training Set
# ============================================================================
from sklearn.model_selection import cross_val_score

print("\n[Cross-Validation] 5-Fold CV on training set...")

cv_scores = cross_val_score(
    RandomForestClassifier(n_estimators=50, random_state=42, n_jobs=-1),
    X_train_original,
    y_train_original_encoded, # Use y_train_original_encoded
    cv=5,
    scoring='f1_macro'
)

print(f"CV F1-Scores: {cv_scores}")
print(f"Mean F1: {cv_scores.mean():.4f} (+/- {cv_scores.std():.4f})")


[Cross-Validation] 5-Fold CV on training set...
CV F1-Scores: [0.9496531  0.90647012 0.97516816 0.98736327 0.96190946]
Mean F1: 0.9561 (+/- 0.0279)


In [None]:
# ============================================================================
# SANITY CHECK - Shuffle Test
# ============================================================================
print("\n[Sanity Check] Testing with shuffled labels...")

# Shuffle test labels (should get ~12.5% accuracy for 8 classes)
y_test_shuffled = np.random.permutation(y_test_original_encoded)

baseline_acc_shuffled = accuracy_score(y_test_shuffled, baseline_pred)
print(f"Accuracy with shuffled labels: {baseline_acc_shuffled:.4f}")
print(f"Expected (random): ~{1/len(le.classes_):.4f}")

if baseline_acc_shuffled > 0.2:
    print("⚠️  WARNING: Accuracy too high with shuffled labels!")
    print("   This suggests overfitting or data leakage!")
else:
    print("✓ Sanity check passed")


[Sanity Check] Testing with shuffled labels...
Accuracy with shuffled labels: 0.2136
Expected (random): ~0.1250
   This suggests overfitting or data leakage!


In [None]:
print("Tahapan 3 Complete!")
gc.collect()

Tahapan 3 Complete!


0

## Tahapan 4: GIR Calculation

**4.1 Define Severity Weights**

In [None]:
# IIoT severity weights based on impact analysis
severity_weights = {
    'malware': 5.0,      # Highest: Persistent threat, data exfiltration, Stuxnet-like
    'dos': 4.5,          # Safety-critical: Can halt industrial operations
    'mitm': 4.5,         # Data integrity: Sensor data manipulation
    'ddos': 4.0,         # Infrastructure: Network availability
    'bruteforce': 3.5,   # Credential: Lateral movement risk
    'web': 3.0,          # Application-layer: Less critical in IIoT
    'recon': 2.5,        # Pre-attack: Reconnaissance phase
    'benign': 1.0        # Normal traffic
}

print("   IIoT Severity Weights (Higher = More Critical):")
for label, weight in sorted(severity_weights.items(), key=lambda x: x[1], reverse=True):
    print(f"      {label:12s}: {weight:.1f}")

   IIoT Severity Weights (Higher = More Critical):
      malware     : 5.0
      dos         : 4.5
      mitm        : 4.5
      ddos        : 4.0
      bruteforce  : 3.5
      web         : 3.0
      recon       : 2.5
      benign      : 1.0


**4.2 Calculate GIR per Class**

In [None]:
def calculate_adaptive_gir(train_df, severity_weights):
    """
    Calculate Adaptive GIR with severity weighting

    Formula: GIR = (w_severity × n_maj) / (1.0 × n_min)

    Unlike FIGS original (fixed w_min=2), A-FIGS uses dynamic severity weights
    """
    class_counts = train_df['label2'].value_counts()
    n_maj = class_counts.max()  # Majority class count

    gir_values = {}
    for label in class_counts.index:
        n_min = class_counts[label]
        w_severity = severity_weights.get(label, 1.0)

        # A-FIGS Formula
        gir = (w_severity * n_maj) / (1.0 * n_min)
        gir_values[label] = gir

    return gir_values

In [None]:
# Calculate GIR
gir_values = calculate_adaptive_gir(train_df, severity_weights)

print("Adaptive GIR Values:")
for label, gir in sorted(gir_values.items(), key=lambda x: x[1], reverse=True):
    count = (train_df['label2'] == label).sum()
    print(f"      {label:12s}: GIR={gir:8.2f} (n={count:7,}, w={severity_weights[label]:.1f})")

Adaptive GIR Values:
      bruteforce  : GIR=  189.56 (n=  4,619, w=3.5)
      web         : GIR=  105.02 (n=  7,146, w=3.0)
      malware     : GIR=   60.29 (n= 20,747, w=5.0)
      mitm        : GIR=   52.38 (n= 21,493, w=4.5)
      dos         : GIR=   22.59 (n= 49,841, w=4.5)
      ddos        : GIR=   21.05 (n= 47,529, w=4.0)
      recon       : GIR=    7.39 (n= 84,591, w=2.5)
      benign      : GIR=    1.00 (n=250,160, w=1.0)


**4.3 Categorize (Plentiful/Limited/Sparse) Conservative GIR Thresholds**

In [None]:
def categorize_by_gir_conservative(gir_values, baseline_f1_scores):
    """
    Categorization konservatif:
    - Hanya augmentasi jika F1 < 0.95 (performa buruk)
    - Gunakan ambang batas persentil yang lebih ketat
    """
    gir_list = list(gir_values.values())

    # Ambang batas lebih ketat: 20% dan 50%
    p20 = np.percentile(gir_list, 20)
    p50 = np.percentile(gir_list, 50)

    categories = {}

    for label, gir in gir_values.items():
        # Cek performa baseline terlebih dahulu
        f1 = baseline_f1_scores.get(label, 0) # Ambil F1 score

        # Jika performa sudah bagus (F1 > 0.95), jangan augmentasi
        if f1 > 0.95:
            categories[label] = 'Plentiful'
            continue

        # Jika tidak, gunakan GIR
        if gir < p20:
            categories[label] = 'Plentiful'
        elif gir < p50:
            categories[label] = 'Limited'
        else:
            categories[label] = 'Sparse'

    return categories, p20, p50

In [None]:
# Kategorisasi dengan pendekatan konservatif
categories, p33, p67 = categorize_by_gir_conservative(gir_values, baseline_f1_scores) # p33 dan p67 sekarang adalah p20 dan p50

print(f"   GIR Percentiles (Konservatif):")
print(f"      20th percentile: {p20:.2f}") # Ganti label print
print(f"      50th percentile: {p50:.2f}") # Ganti label print

print(f"\n   Class Categories (Konservatif):")
for category_name in ['Plentiful', 'Limited', 'Sparse']:
    classes = [label for label, cat in categories.items() if cat == category_name]
    print(f"\n      {category_name}:")
    for label in classes:
        count = (train_df['label2'] == label).sum()
        f1 = baseline_f1_scores.get(label)
        print(f"         {label:12s} (n={count:7,}, GIR={gir_values[label]:6.2f}, F1={f1:.3f})")

# Store for later use
plentiful_classes = [label for label, cat in categories.items() if cat == 'Plentiful']
limited_classes = [label for label, cat in categories.items() if cat == 'Limited']
sparse_classes = [label for label, cat in categories.items() if cat == 'Sparse']

   GIR Percentiles (Konservatif):
      20th percentile: 12.86
      50th percentile: 37.48

   Class Categories (Konservatif):

      Plentiful:
         benign       (n=250,160, GIR=  1.00, F1=0.996)
         recon        (n= 84,591, GIR=  7.39, F1=0.992)
         dos          (n= 49,841, GIR= 22.59, F1=0.998)
         ddos         (n= 47,529, GIR= 21.05, F1=0.997)
         mitm         (n= 21,493, GIR= 52.38, F1=0.999)
         malware      (n= 20,747, GIR= 60.29, F1=0.996)
         web          (n=  7,146, GIR=105.02, F1=0.999)
         bruteforce   (n=  4,619, GIR=189.56, F1=1.000)

      Limited:

      Sparse:


**4.4 Visualization**

In [None]:
def plot_gir_analysis(gir_values, categories, severity_weights, save_path='gir_analysis.png'):
    """Visualize GIR analysis"""

    fig, axes = plt.subplots(2, 2, figsize=(16, 12))

    # 1. GIR values bar plot
    labels = list(gir_values.keys())
    girs = [gir_values[label] for label in labels]
    colors = ['green' if categories[label]=='Plentiful'
              else 'orange' if categories[label]=='Limited'
              else 'red' for label in labels]

    axes[0, 0].barh(labels, girs, color=colors, edgecolor='black')
    axes[0, 0].set_xlabel('GIR Value')
    axes[0, 0].set_title('Adaptive GIR per Class')
    axes[0, 0].grid(axis='x', alpha=0.3)
    axes[0, 0].axvline(p33, color='blue', linestyle='--', label='33rd percentile')
    axes[0, 0].axvline(p67, color='purple', linestyle='--', label='67th percentile')
    axes[0, 0].legend()

    # 2. Severity weights
    weights = [severity_weights[label] for label in labels]
    axes[0, 1].barh(labels, weights, color='steelblue', edgecolor='black')
    axes[0, 1].set_xlabel('Severity Weight')
    axes[0, 1].set_title('IIoT Severity Weights')
    axes[0, 1].grid(axis='x', alpha=0.3)

    # 3. Category distribution
    category_counts = {}
    for cat in ['Plentiful', 'Limited', 'Sparse']:
        category_counts[cat] = sum(1 for c in categories.values() if c == cat)

    axes[1, 0].pie(category_counts.values(), labels=category_counts.keys(),
                   autopct='%1.1f%%', colors=['green', 'orange', 'red'],
                   startangle=90, explode=[0.05, 0.05, 0.05])
    axes[1, 0].set_title('Class Category Distribution')

    # 4. GIR vs Sample Count
    counts = [len(train_df[train_df['label2']==label]) for label in labels]
    scatter_colors = [colors[i] for i in range(len(labels))]
    axes[1, 1].scatter(counts, girs, c=scatter_colors, s=150,
                      edgecolors='black', alpha=0.7)
    for i, label in enumerate(labels):
        axes[1, 1].annotate(label, (counts[i], girs[i]),
                           fontsize=9, ha='right')
    axes[1, 1].set_xlabel('Sample Count')
    axes[1, 1].set_ylabel('GIR Value')
    axes[1, 1].set_title('GIR vs Sample Count')
    axes[1, 1].grid(alpha=0.3)
    axes[1, 1].set_xscale('log')

    plt.tight_layout()
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    print(f"   Saved: {save_path}")
    plt.close()

In [None]:
# Plot GIR analysis
plot_gir_analysis(gir_values, categories, severity_weights)

   Saved: gir_analysis.png


In [None]:
print("Tahapan 4 Complete!")
gc.collect()

Tahapan 4 Complete!


4062

In [None]:
# Set seeds
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

##Tahapan 5: Group-Based Feature Selection

**5.1 Define Feature Groups**

In [None]:
def define_iiot_feature_groups(train_df):
    """
    Define 9 IIoT-specific feature groups based on CIC-IIoT-2025
    """
    all_cols = [col for col in train_df.columns
                if col not in ['label2', 'time_window']]

    feature_groups = {
        'log_stats': [
            col for col in all_cols
            if col.startswith('log_') and 'interval' not in col
        ],

        'packet_rate': [
            col for col in all_cols
            if 'interval' in col or ('packets_' in col and '_count' in col)
        ],

        'size_length': [
            col for col in all_cols
            if any(x in col for x in ['length', 'size', 'mss', 'payload'])
               and 'window-size' not in col
        ],

        'tcp_flags': [
            col for col in all_cols if 'tcp-flags' in col
        ],

        'ip_flags': [
            col for col in all_cols if 'ip-flags' in col
        ],

        'address_diversity': [
            col for col in all_cols
            if any(x in col for x in ['ips_', 'macs_']) and '_count' in col
        ],

        'network_multiplexing': [
            col for col in all_cols
            if any(x in col for x in ['ports_', 'protocols_']) and '_count' in col
        ],

        'timing_control': [
            col for col in all_cols
            if any(x in col for x in ['time-delta', 'ttl', 'window-size'])
        ],

        'fragmentation': [
            col for col in all_cols if 'fragment' in col
        ]
    }

    # Print summary
    print("Feature Groups Defined:")
    total_features = 0
    for group_name, features in feature_groups.items():
        print(f"      {group_name:25s}: {len(features):2d} features")
        total_features += len(features)

    print(f"\nTotal features: {total_features}")

    return feature_groups

# Define groups
feature_groups = define_iiot_feature_groups(train_df)

Feature Groups Defined:
      log_stats                :  6 features
      packet_rate              :  5 features
      size_length              : 20 features
      tcp_flags                : 10 features
      ip_flags                 :  4 features
      address_diversity        :  6 features
      network_multiplexing     :  6 features
      timing_control           : 12 features
      fragmentation            :  2 features

Total features: 71


In [None]:
# Define groups
feature_groups = define_iiot_feature_groups(train_df)

Feature Groups Defined:
      log_stats                :  6 features
      packet_rate              :  5 features
      size_length              : 20 features
      tcp_flags                : 10 features
      ip_flags                 :  4 features
      address_diversity        :  6 features
      network_multiplexing     :  6 features
      timing_control           : 12 features
      fragmentation            :  2 features

Total features: 71


**5.2 Sensitivity Analysis per Group**

In [None]:
class SimpleDiscriminator(nn.Module):
    """
    Lightweight Discriminator for sensitivity analysis
    Memory-efficient architecture
    """
    def __init__(self, input_dim):
        super(SimpleDiscriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [None]:
def train_discriminator_for_class(X_class, X_benign, epochs=20, batch_size=512):
    """
    Train a simple discriminator to distinguish attack class from benign
    Used for sensitivity analysis
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Prepare data
    X_attack = torch.FloatTensor(X_class.values).to(device)
    X_normal = torch.FloatTensor(X_benign.values).to(device)

    # Labels: 1 for attack, 0 for benign
    y_attack = torch.ones(len(X_attack), 1).to(device)
    y_normal = torch.zeros(len(X_normal), 1).to(device)

    # Combine
    X_combined = torch.cat([X_attack, X_normal], dim=0)
    y_combined = torch.cat([y_attack, y_normal], dim=0)

    # Shuffle
    indices = torch.randperm(len(X_combined))
    X_combined = X_combined[indices]
    y_combined = y_combined[indices]

    # Create DataLoader
    dataset = TensorDataset(X_combined, y_combined)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # Initialize discriminator
    discriminator = SimpleDiscriminator(X_class.shape[1]).to(device)
    optimizer = optim.Adam(discriminator.parameters(), lr=0.001)
    criterion = nn.BCELoss()

    # Training loop
    discriminator.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for batch_X, batch_y in dataloader:
            optimizer.zero_grad()
            outputs = discriminator(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        if (epoch + 1) % 10 == 0:
            print(f"         Epoch {epoch+1}/{epochs}, Loss: {epoch_loss/len(dataloader):.4f}")

    return discriminator

In [None]:
def sensitivity_analysis_per_group(discriminator, X_sample, feature_groups,
                                   epsilon=1e-5, top_k=5):
    """
    Perform sensitivity analysis PER feature group
    Returns top-K features from each group
    """
    device = next(discriminator.parameters()).device
    discriminator.eval()

    selected_features = {}
    all_importance_scores = {}

    with torch.no_grad():
        for group_name, group_features in feature_groups.items():
            # Filter valid features
            valid_features = [f for f in group_features if f in X_sample.columns]

            if len(valid_features) == 0:
                continue

            print(f"\n      Analyzing {group_name} ({len(valid_features)} features)...")

            importance_scores = {}

            # Get baseline output
            X_tensor = torch.FloatTensor(X_sample.values).to(device)
            baseline_output = discriminator(X_tensor).cpu().numpy()

            # Perturb each feature
            for feature in valid_features:
                feature_idx = X_sample.columns.get_loc(feature)

                # Create perturbed copy
                X_perturbed = X_sample.copy()
                X_perturbed.iloc[:, feature_idx] += epsilon

                # Get perturbed output
                X_pert_tensor = torch.FloatTensor(X_perturbed.values).to(device)
                perturbed_output = discriminator(X_pert_tensor).cpu().numpy()

                # Calculate importance
                importance = np.abs(perturbed_output - baseline_output).mean()
                importance_scores[feature] = importance

            # Sort and select top-K
            sorted_features = sorted(importance_scores.items(),
                                   key=lambda x: x[1], reverse=True)

            top_features = [f[0] for f in sorted_features[:top_k]]
            selected_features[group_name] = top_features
            all_importance_scores[group_name] = importance_scores

            print(f"         Top-{top_k} features:")
            for i, (feat, score) in enumerate(sorted_features[:top_k], 1):
                print(f"            {i}. {feat:40s}: {score:.6f}")

    return selected_features, all_importance_scores

In [None]:
# Perform group-based feature selection for each class category
print("Starting sensitivity analysis...")

Starting sensitivity analysis...


In [None]:
# Sample data for efficiency (10k samples per class)
sample_size = 10000
X_benign_sample = train_df[train_df['label2'] == 'benign'][feature_cols].sample(
    n=min(sample_size, len(train_df[train_df['label2'] == 'benign'])),
    random_state=42
)

selected_features_by_class = {}
importance_scores_by_class = {}

In [None]:
# Process Limited and Sparse classes only (they need augmentation)
target_classes = limited_classes + sparse_classes

for attack_class in target_classes:
    print(f"Processing class: {attack_class}")
    print(f"   Category: {categories[attack_class]}")

    # Get attack samples
    X_attack = train_df[train_df['label2'] == attack_class][feature_cols]

    if len(X_attack) < 100:
        print(f"      WARNING: Only {len(X_attack)} samples, using all")
        X_attack_sample = X_attack
    else:
        X_attack_sample = X_attack.sample(
            n=min(sample_size, len(X_attack)),
            random_state=42
        )

    print(f"      Training discriminator...")
    discriminator = train_discriminator_for_class(
        X_attack_sample, X_benign_sample,
        epochs=20, batch_size=256
    )

    print(f"\n      Running sensitivity analysis...")
    selected_feats, importance_scores = sensitivity_analysis_per_group(
        discriminator, X_attack_sample, feature_groups,
        epsilon=1e-5, top_k=5
    )

    selected_features_by_class[attack_class] = selected_feats
    importance_scores_by_class[attack_class] = importance_scores

    # Clear GPU memory
    del discriminator
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

**5.3 Select Top-K per Group**

In [None]:
def consolidate_selected_features(selected_features_by_class):
    """
    Consolidate selected features across all classes
    """
    all_selected = set()

    for attack_class, group_features in selected_features_by_class.items():
        for group_name, features in group_features.items():
            all_selected.update(features)

    return list(all_selected)

# Get all selected features
selected_important_features = consolidate_selected_features(selected_features_by_class)

print(f"Total selected features: {len(selected_important_features)}")
print(f"   Selected from original: {len(feature_cols)} features")
print(f"   Reduction: {(1 - len(selected_important_features)/len(feature_cols))*100:.1f}%")

Total selected features: 0
   Selected from original: 71 features
   Reduction: 100.0%


**5.4 Validate Selected Features**

In [None]:
def validate_selected_features(selected_features, feature_groups):
    """
    Validate feature selection across groups
    """
    print("Selected Features per Group:")

    group_distribution = {}
    for group_name, group_feats in feature_groups.items():
        selected_in_group = [f for f in selected_features if f in group_feats]
        group_distribution[group_name] = len(selected_in_group)

        if len(selected_in_group) > 0:
            pct = (len(selected_in_group) / len(selected_features)) * 100
            print(f"      {group_name:25s}: {len(selected_in_group):2d} ({pct:5.1f}%)")

    return group_distribution

# Validate
group_distribution = validate_selected_features(
    selected_important_features, feature_groups
)

print("\n Tahapan 5 Complete!")
gc.collect()

Selected Features per Group:

 Tahapan 5 Complete!


16564

##Tahapan 6: Data Augmentation (FIGS)

**6.1 FIGAN for Limited Classes**

In [None]:
class Generator(nn.Module):
    """Lightweight Generator for FIGAN"""
    def __init__(self, noise_dim, output_dim):
        super(Generator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(noise_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, output_dim),
            nn.Tanh()
        )

    def forward(self, z):
        return self.model(z)

class Discriminator(nn.Module):
    """Discriminator for FIGAN"""
    def __init__(self, input_dim):
        super(Discriminator, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.Dropout(0.3),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

In [None]:
def train_figan(X_real, selected_features, noise_dim=100, epochs=50,
                batch_size=256, save_every=10):
    """
    Train FIGAN for generating synthetic samples
    Only generates data for selected important features
    """
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"      Using device: {device}")

    # Filter to selected features only
    X_selected = X_real[selected_features].values
    n_features = len(selected_features)

    # Initialize models
    generator = Generator(noise_dim, n_features).to(device)
    discriminator = Discriminator(n_features).to(device)

    # Optimizers
    g_optimizer = optim.Adam(generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
    d_optimizer = optim.Adam(discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))

    criterion = nn.BCELoss()

    # Convert to tensor
    X_tensor = torch.FloatTensor(X_selected).to(device)

    # Training loop
    g_losses = []
    d_losses = []

    for epoch in range(epochs):
        # ---------------------
        #  Train Discriminator
        # ---------------------
        discriminator.train()

        # Real samples
        real_samples = X_tensor[torch.randint(0, len(X_tensor), (batch_size,))]
        real_labels = torch.ones(batch_size, 1).to(device)

        # Fake samples
        noise = torch.randn(batch_size, noise_dim).to(device)
        fake_samples = generator(noise)
        fake_labels = torch.zeros(batch_size, 1).to(device)

        # Discriminator loss
        d_optimizer.zero_grad()

        real_loss = criterion(discriminator(real_samples), real_labels)
        fake_loss = criterion(discriminator(fake_samples.detach()), fake_labels)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        d_optimizer.step()

        # -----------------
        #  Train Generator
        # -----------------
        g_optimizer.zero_grad()

        noise = torch.randn(batch_size, noise_dim).to(device)
        fake_samples = generator(noise)
        g_loss = criterion(discriminator(fake_samples), real_labels)

        g_loss.backward()
        g_optimizer.step()

        # Record losses
        g_losses.append(g_loss.item())
        d_losses.append(d_loss.item())

        if (epoch + 1) % save_every == 0:
            print(f"         Epoch {epoch+1}/{epochs} | D Loss: {d_loss.item():.4f} | G Loss: {g_loss.item():.4f}")

    return generator, discriminator, g_losses, d_losses

def generate_synthetic_samples(generator, n_samples, selected_features,
                               full_feature_list, noise_dim=100):
    """
    Generate synthetic samples and pad non-selected features with zeros
    """
    device = next(generator.parameters()).device
    generator.eval()

    with torch.no_grad():
        noise = torch.randn(n_samples, noise_dim).to(device)
        synthetic_selected = generator(noise).cpu().numpy()

    # Create full feature dataframe with zeros
    synthetic_df = pd.DataFrame(
        np.zeros((n_samples, len(full_feature_list))),
        columns=full_feature_list
    )

    # Fill selected features with generated data
    synthetic_df[selected_features] = synthetic_selected

    return synthetic_df

In [None]:
# Train FIGAN for each Limited class
print("Training FIGAN for Limited classes...")

synthetic_data_limited = {}

for attack_class in limited_classes:
    print(f"\n   Class: {attack_class}")

    # Get real samples
    X_real = train_df[train_df['label2'] == attack_class][feature_cols]
    n_real = len(X_real)

    # Get selected features for this class
    if attack_class in selected_features_by_class:
        class_selected_features = consolidate_selected_features(
            {attack_class: selected_features_by_class[attack_class]}
        )
    else:
        # Fallback to all selected features
        class_selected_features = selected_important_features

    print(f"      Using {len(class_selected_features)} selected features")
    print(f"      Real samples: {n_real}")

    # Calculate target samples (balance to majority class)
    n_majority = (train_df['label2'] == 'benign').sum()
    n_target = int(n_majority * 0.5)  # Target 50% of majority
    n_generate = max(0, n_target - n_real)

    if n_generate == 0:
        print(f"      No generation needed (already sufficient)")
        continue

    print(f"      Target samples: {n_target}")
    print(f"      Will generate: {n_generate} samples")

    # Train FIGAN
    print(f"      Training FIGAN...")
    generator, discriminator, g_losses, d_losses = train_figan(
        X_real, class_selected_features,
        noise_dim=100, epochs=50, batch_size=256
    )

    # Generate synthetic samples
    print(f"      Generating synthetic samples...")
    synthetic_df = generate_synthetic_samples(
        generator, n_generate, class_selected_features,
        feature_cols, noise_dim=100
    )

    synthetic_data_limited[attack_class] = synthetic_df

    print(f"      ✓ Generated {len(synthetic_df)} samples")

    # Clear memory
    del generator, discriminator
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

Training FIGAN for Limited classes...


**6.2 FISMOTE for Sparse Classes**


In [None]:
def fismote_generate(X_real, selected_features, full_feature_list,
                     n_samples, k_neighbors=5):
    """
    Feature-Importance SMOTE
    Generate synthetic samples using SMOTE on selected features only
    """
    print(f" Using {len(selected_features)} selected features")

    # Extract selected features
    X_selected = X_real[selected_features].values

    if len(X_real) < k_neighbors:
        k_neighbors = max(1, len(X_real) - 1)
        print(f"      Adjusted k_neighbors to {k_neighbors}")

    # Fit NearestNeighbors
    nbrs = NearestNeighbors(n_neighbors=k_neighbors, algorithm='auto').fit(X_selected)

    # Generate synthetic samples
    synthetic_selected = []

    for _ in range(n_samples):
        # Randomly select a sample
        idx = np.random.randint(0, len(X_selected))
        sample = X_selected[idx]

        # Find k nearest neighbors
        distances, indices = nbrs.kneighbors([sample])

        # Randomly select a neighbor
        neighbor_idx = np.random.choice(indices[0])
        neighbor = X_selected[neighbor_idx]

        # Interpolate
        alpha = np.random.random()
        synthetic_sample = sample + alpha * (neighbor - sample)
        synthetic_selected.append(synthetic_sample)

    synthetic_selected = np.array(synthetic_selected)

    # Create full feature dataframe with zeros
    synthetic_df = pd.DataFrame(
        np.zeros((n_samples, len(full_feature_list))),
        columns=full_feature_list
    )

    # Fill selected features
    synthetic_df[selected_features] = synthetic_selected

    return synthetic_df

In [None]:
# Generate synthetic data for Sparse classes
print("Generating synthetic data for Sparse classes...")

synthetic_data_sparse = {}

for attack_class in sparse_classes:
    print(f"\n   Class: {attack_class}")

    # Get real samples
    X_real = train_df[train_df['label2'] == attack_class][feature_cols]
    n_real = len(X_real)

    # Get selected features
    if attack_class in selected_features_by_class:
        class_selected_features = consolidate_selected_features(
            {attack_class: selected_features_by_class[attack_class]}
        )
    else:
        class_selected_features = selected_important_features

    print(f"   Real samples: {n_real}")
    print(f"   Using {len(class_selected_features)} selected features")

    # Calculate target samples
    n_majority = (train_df['label2'] == 'benign').sum()
    n_target = int(n_majority * 0.2)  # Target 20% of majority for sparse
    n_generate = max(0, n_target - n_real)

    if n_generate == 0:
        print(f"  No generation needed")
        continue

    print(f"  Target samples: {n_target}")
    print(f"  Will generate: {n_generate} samples")

    # Generate using FISMOTE
    print(f"  Generating with FISMOTE...")
    synthetic_df = fismote_generate(
        X_real, class_selected_features, feature_cols,
        n_generate, k_neighbors=min(5, n_real-1)
    )

    synthetic_data_sparse[attack_class] = synthetic_df

    print(f" Generated {len(synthetic_df)} samples")

Generating synthetic data for Sparse classes...


**6.3 Merge Augmented Data**

In [None]:
def merge_augmented_data(train_df, synthetic_data_limited, synthetic_data_sparse,
                        plentiful_classes, limited_classes, sparse_classes):
    """
    Merge original and synthetic data
    """
    augmented_dfs = []

    # Add Plentiful classes (no augmentation)
    for attack_class in plentiful_classes:
        class_df = train_df[train_df['label2'] == attack_class].copy()
        augmented_dfs.append(class_df)
        print(f"      {attack_class:12s} (Plentiful): {len(class_df):7,} samples (original)")

    # Add Limited classes (with FIGAN synthetic)
    for attack_class in limited_classes:
        class_df = train_df[train_df['label2'] == attack_class].copy()
        original_count = len(class_df)

        if attack_class in synthetic_data_limited:
            synthetic_df = synthetic_data_limited[attack_class].copy()
            synthetic_df['label2'] = attack_class
            synthetic_df['time_window'] = class_df['time_window'].mode()[0]

            augmented_dfs.append(class_df)
            augmented_dfs.append(synthetic_df)

            total_count = original_count + len(synthetic_df)
            print(f"      {attack_class:12s} (Limited):   {original_count:7,} + {len(synthetic_df):7,} = {total_count:7,}")
        else:
            augmented_dfs.append(class_df)
            print(f"      {attack_class:12s} (Limited):   {original_count:7,} (no augmentation)")

    # Add Sparse classes (with FISMOTE synthetic)
    for attack_class in sparse_classes:
        class_df = train_df[train_df['label2'] == attack_class].copy()
        original_count = len(class_df)

        if attack_class in synthetic_data_sparse:
            synthetic_df = synthetic_data_sparse[attack_class].copy()
            synthetic_df['label2'] = attack_class
            synthetic_df['time_window'] = class_df['time_window'].mode()[0]

            augmented_dfs.append(class_df)
            augmented_dfs.append(synthetic_df)

            total_count = original_count + len(synthetic_df)
            print(f"      {attack_class:12s} (Sparse):    {original_count:7,} + {len(synthetic_df):7,} = {total_count:7,}")
        else:
            augmented_dfs.append(class_df)
            print(f"      {attack_class:12s} (Sparse):    {original_count:7,} (no augmentation)")

    # Concatenate all
    train_augmented = pd.concat(augmented_dfs, ignore_index=True)

    return train_augmented

In [None]:
# Merge data
print("Merging augmented data...")
train_augmented = merge_augmented_data(
    train_df, synthetic_data_limited, synthetic_data_sparse,
    plentiful_classes, limited_classes, sparse_classes
)

print(f"\n Augmentation Summary:")
print(f"      Original train size: {len(train_df):,}")
print(f"      Augmented train size: {len(train_augmented):,}")
print(f"      Increase: {len(train_augmented) - len(train_df):,} samples ({((len(train_augmented)/len(train_df))-1)*100:.1f}%)")

Merging augmented data...
      benign       (Plentiful): 250,160 samples (original)
      recon        (Plentiful):  84,591 samples (original)
      dos          (Plentiful):  49,841 samples (original)
      ddos         (Plentiful):  47,529 samples (original)
      mitm         (Plentiful):  21,493 samples (original)
      malware      (Plentiful):  20,747 samples (original)
      web          (Plentiful):   7,146 samples (original)
      bruteforce   (Plentiful):   4,619 samples (original)

 Augmentation Summary:
      Original train size: 486,126
      Augmented train size: 486,126
      Increase: 0 samples (0.0%)


**6.4 Validate Synthetic Data Quality**

In [None]:
def validate_synthetic_quality(train_original, train_augmented):
    """
    Validate quality of synthetic data
    """
    print("  Statistical Validation:")

    for attack_class in limited_classes + sparse_classes:
        real_data = train_original[train_original['label2'] == attack_class][feature_cols]
        aug_data = train_augmented[train_augmented['label2'] == attack_class][feature_cols]

        if len(real_data) == len(aug_data):
            continue  # No synthetic data generated

        synthetic_data = aug_data.iloc[len(real_data):]  # Only synthetic

        print(f"\n      {attack_class}:")
        print(f"         Real samples: {len(real_data)}")
        print(f"         Synthetic samples: {len(synthetic_data)}")

        # Compare distributions (first 5 features)
        for feat in feature_cols[:5]:
            real_mean = real_data[feat].mean()
            synth_mean = synthetic_data[feat].mean()
            diff_pct = abs((synth_mean - real_mean) / (real_mean + 1e-10)) * 100

            print(f"         {feat:40s}: Real={real_mean:7.3f}, Synth={synth_mean:7.3f}, Diff={diff_pct:5.1f}%")

# Validate
validate_synthetic_quality(train_df, train_augmented)

print("\n Tahapan 6 Complete!")
gc.collect()

  Statistical Validation:

 Tahapan 6 Complete!


0

##Tahapan 7: Model Training



In [None]:
# Prepare data
print("Preparing training data...")

# Encode labels
le = LabelEncoder()
y_train_augmented = le.fit_transform(train_augmented['label2'])
y_test = le.transform(test_df['label2'])

X_train_augmented = train_augmented[feature_cols].values
X_test = test_df[feature_cols].values

print(f"  X_train shape: {X_train_augmented.shape}")
print(f"  X_test shape:  {X_test.shape}")
print(f"  Classes: {le.classes_}")

Preparing training data...
  X_train shape: (486126, 71)
  X_test shape:  (60516, 71)
  Classes: ['benign' 'bruteforce' 'ddos' 'dos' 'malware' 'mitm' 'recon' 'web']


**7.1 XGBoost Training**

In [None]:
def train_xgboost(X_train, y_train, X_test, y_test, n_classes):
    """
    Train XGBoost classifier with optimized parameters
    """
    print("Initializing XGBoost...")

    xgb_params = {
        'objective': 'multi:softmax',
        'num_class': n_classes,
        'max_depth': 6,
        'learning_rate': 0.1,
        'n_estimators': 100,
        'subsample': 0.8,
        'colsample_bytree': 0.8,
        'random_state': 42,
        'tree_method': 'hist',  # Faster for large datasets
        'n_jobs': -1
    }

    model = xgb.XGBClassifier(**xgb_params)

    print("      Training XGBoost...")
    model.fit(X_train, y_train,
             eval_set=[(X_test, y_test)],
             verbose=False)

    # Predictions
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    train_acc = accuracy_score(y_train, y_pred_train)
    test_acc = accuracy_score(y_test, y_pred_test)

    print(f"      Train Accuracy: {train_acc:.4f}")
    print(f"      Test Accuracy:  {test_acc:.4f}")

    return model, y_pred_test

In [None]:
# Train XGBoost
xgb_model, xgb_pred = train_xgboost(
    X_train_augmented, y_train_augmented,
    X_test, y_test, len(le.classes_)
)

Initializing XGBoost...
      Training XGBoost...
      Train Accuracy: 0.9743
      Test Accuracy:  0.9834


In [None]:
# Menyimpan Hasil Model
joblib.dump(xgb_model, "/content/drive/MyDrive/Dataset/CIC_IIoT_2025/Brave/xgb_model.pkl")

['/content/drive/MyDrive/Dataset/CIC_IIoT_2025/Brave/xgb_model.pkl']

In [None]:
# Menload Kembali Hasil Model
xgb_model = joblib.load("/content/drive/MyDrive/Dataset/CIC_IIoT_2025/Brave/xgb_model.pkl")

print("Model berhasil di load")

**7.2 LightGBM Training**


In [None]:
def train_lightgbm(X_train, y_train, X_test, y_test, n_classes):
    """
    Train LightGBM classifier with optimized parameters
    """
    print("Initializing LightGBM...")

    lgb_params = {
        'objective': 'multiclass',
        'num_class': n_classes,
        'max_depth': 6,
        'learning_rate': 0.1,
        'n_estimators': 100,
        'subsample': 0.8,
        'colsample_bytree': 0.8,
        'random_state': 42,
        'n_jobs': -1,
        'verbose': -1
    }

    model = lgb.LGBMClassifier(**lgb_params)

    print("      Training LightGBM...")
    model.fit(X_train, y_train,
             eval_set=[(X_test, y_test)],
             eval_metric='multi_logloss',
             callbacks=[lgb.early_stopping(stopping_rounds=10, verbose=False)])

    # Predictions
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    train_acc = accuracy_score(y_train, y_pred_train)
    test_acc = accuracy_score(y_test, y_pred_test)

    print(f"      Train Accuracy: {train_acc:.4f}")
    print(f"      Test Accuracy:  {test_acc:.4f}")

    return model, y_pred_test

In [None]:
# Train LightGBM
lgb_model, lgb_pred = train_lightgbm(
    X_train_augmented, y_train_augmented,
    X_test, y_test, len(le.classes_)
)

Initializing LightGBM...
      Training LightGBM...
      Train Accuracy: 0.9788
      Test Accuracy:  0.9874


In [None]:
# Menyimpan Hasil Model
joblib.dump(lgb_model, "/content/drive/MyDrive/Dataset/CIC_IIoT_2025/Brave/lgb_model.pkl")

print("Model berhasil disimpan")

Model berhasil disimpan


In [None]:
# Menload Kembali Hasil Model
lgb_model = joblib.load("/content/drive/MyDrive/Dataset/CIC_IIoT_2025/Brave/lgb_model.pkl")

print("Model berhasil diload Kembali")

**7.3 Random Forest Training**

In [None]:
def train_random_forest(X_train, y_train, X_test, y_test):
    """
    Train Random Forest classifier
    """
    print("Initializing Random Forest...")

    rf_params = {
        'n_estimators': 100,
        'max_depth': 20,
        'min_samples_split': 5,
        'min_samples_leaf': 2,
        'random_state': 42,
        'n_jobs': -1,
        'verbose': 0
    }

    model = RandomForestClassifier(**rf_params)

    print("      Training Random Forest...")
    model.fit(X_train, y_train)

    # Predictions
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    train_acc = accuracy_score(y_train, y_pred_train)
    test_acc = accuracy_score(y_test, y_pred_test)

    print(f"      Train Accuracy: {train_acc:.4f}")
    print(f"      Test Accuracy:  {test_acc:.4f}")

    return model, y_pred_test

In [None]:
# Train Random Forest
rf_model, rf_pred = train_random_forest(
    X_train_augmented, y_train_augmented,
    X_test, y_test
)

Initializing Random Forest...
      Training Random Forest...
      Train Accuracy: 0.9796
      Test Accuracy:  0.9851


In [None]:
# Menyimpan Hasil Model
joblib.dump(rf_model, "/content/drive/MyDrive/Dataset/CIC_IIoT_2025/Brave/rf_model.pkl")

print("Model berhasil disimpan")

Model berhasil disimpan


In [None]:
# Menload Kembali Hasil Model
rf_model = joblib.load("/content/drive/MyDrive/Dataset/CIC_IIoT_2025/Brave/rf_model.pkl")

print("Model berhasil diload Kembali")

In [None]:
print("Tahapan 7 Complete!")
gc.collect()

Tahapan 7 Complete!


89

In [None]:
import psutil

# Memori Tersedia
memory_available = psutil.virtual_memory().available

# Konversi ke GB
memory_available_gb = memory_available / (1024 ** 3)

print(f"Jumlah memori yang tersedia: {memory_available_gb:.2f} GB")

Jumlah memori yang tersedia: 7.30 GB


##Tahapan 8: Evaluation

**8.1 Predictions on Test Set**

In [None]:
predictions_dict = {
    'XGBoost': xgb_pred,
    'LightGBM': lgb_pred,
    'Random Forest': rf_pred
}

print("Predictions collected for all models")

Predictions collected for all models


**8.2 Metrics Calculation (per class)**

In [None]:
def calculate_metrics_per_class(y_true, y_pred, label_encoder):
    """
    Calculate comprehensive metrics for each class
    """
    # Get class names
    classes = label_encoder.classes_

    # Calculate metrics
    precision, recall, f1, support = precision_recall_fscore_support(
        y_true, y_pred, labels=range(len(classes)), zero_division=0
    )

    # Overall metrics
    accuracy = accuracy_score(y_true, y_pred)
    mcc = matthews_corrcoef(y_true, y_pred)

    # Macro/Weighted averages
    macro_precision = precision.mean()
    macro_recall = recall.mean()
    macro_f1 = f1.mean()

    weighted_precision = (precision * support).sum() / support.sum()
    weighted_recall = (recall * support).sum() / support.sum()
    weighted_f1 = (f1 * support).sum() / support.sum()

    # Create results DataFrame
    results_df = pd.DataFrame({
        'Class': classes,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'Support': support
    })

    # Add overall metrics
    overall_metrics = {
        'Accuracy': accuracy,
        'MCC': mcc,
        'Macro Precision': macro_precision,
        'Macro Recall': macro_recall,
        'Macro F1': macro_f1,
        'Weighted Precision': weighted_precision,
        'Weighted Recall': weighted_recall,
        'Weighted F1': weighted_f1
    }

    return results_df, overall_metrics

In [None]:
# Calculate metrics for all models
all_results = {}

for model_name, y_pred in predictions_dict.items():
    print(f"\n{model_name}:")

    results_df, overall_metrics = calculate_metrics_per_class(
        y_test, y_pred, le
    )

    all_results[model_name] = {
        'per_class': results_df,
        'overall': overall_metrics
    }

    # Print per-class results
    print("\n      Per-Class Metrics:")
    print(results_df.to_string(index=False))

    # Print overall metrics
    print("\n      Overall Metrics:")
    for metric, value in overall_metrics.items():
        print(f"         {metric:20s}: {value:.4f}")


XGBoost:

      Per-Class Metrics:
     Class  Precision   Recall  F1-Score  Support
    benign   0.973596 0.999032  0.986150    27903
bruteforce   0.998557 0.976023  0.987161      709
      ddos   0.997620 0.978065  0.987745     6428
       dos   0.986732 0.991267  0.988994     6527
   malware   0.991975 0.982377  0.987153     2894
      mitm   0.989875 0.992555  0.991213     2955
     recon   0.991815 0.946485  0.968620    12034
       web   1.000000 0.955910  0.977458     1066

      Overall Metrics:
         Accuracy            : 0.9834
         MCC                 : 0.9770
         Macro Precision     : 0.9913
         Macro Recall        : 0.9777
         Macro F1            : 0.9843
         Weighted Precision  : 0.9836
         Weighted Recall     : 0.9834
         Weighted F1         : 0.9833

LightGBM:

      Per-Class Metrics:
     Class  Precision   Recall  F1-Score  Support
    benign   0.978924 0.998781  0.988753    27903
bruteforce   0.992938 0.991537  0.992237      709

**8.3 Confusion Matrix**

In [None]:
def plot_confusion_matrix(y_true, y_pred, label_encoder, model_name,
                         save_path=None):
    """
    Plot confusion matrix heatmap
    """
    cm = confusion_matrix(y_true, y_pred)
    classes = label_encoder.classes_

    plt.figure(figsize=(12, 10))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=classes, yticklabels=classes,
                cbar_kws={'label': 'Count'})

    plt.title(f'Confusion Matrix - {model_name}', fontsize=14, fontweight='bold')
    plt.ylabel('True Label', fontsize=12)
    plt.xlabel('Predicted Label', fontsize=12)
    plt.xticks(rotation=45, ha='right')
    plt.yticks(rotation=0)

    # Add accuracy in title
    accuracy = accuracy_score(y_true, y_pred)
    plt.suptitle(f'Accuracy: {accuracy:.4f}', y=0.98, fontsize=10)

    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, dpi=150, bbox_inches='tight')
        print(f"      Saved: {save_path}")

    plt.close()

In [None]:
# Plot confusion matrices for all models
for model_name, y_pred in predictions_dict.items():
    print(f"\n   Generating confusion matrix for {model_name}...")
    save_path = f'confusion_matrix_{model_name.lower().replace(" ", "_")}.png'
    plot_confusion_matrix(y_test, y_pred, le, model_name, save_path)


   Generating confusion matrix for XGBoost...
      Saved: confusion_matrix_xgboost.png

   Generating confusion matrix for LightGBM...
      Saved: confusion_matrix_lightgbm.png

   Generating confusion matrix for Random Forest...
      Saved: confusion_matrix_random_forest.png


**8.4 Comparison with Baseline**

In [None]:
def train_baseline_models(X_train_original, y_train_original, X_test, y_test):
    """
    Train baseline models WITHOUT augmentation for comparison
    """
    print("      Training baseline XGBoost (no augmentation)...")

    xgb_baseline = xgb.XGBClassifier(
        objective='multi:softmax',
        num_class=len(np.unique(y_train_original)),
        max_depth=6,
        learning_rate=0.1,
        n_estimators=100,
        random_state=42,
        tree_method='hist',
        n_jobs=-1
    )

    xgb_baseline.fit(X_train_original, y_train_original, verbose=False)
    baseline_pred = xgb_baseline.predict(X_test)

    return baseline_pred

In [None]:
# Prepare baseline data (original train without augmentation)
X_train_original = train_df[feature_cols].values
y_train_original = le.transform(train_df['label2'])

In [None]:
# Train baseline
baseline_pred = train_baseline_models(
    X_train_original, y_train_original, X_test, y_test
)

      Training baseline XGBoost (no augmentation)...


In [None]:
# Calculate baseline metrics
print("Baseline Model (No Augmentation):")
baseline_results, baseline_overall = calculate_metrics_per_class(
    y_test, baseline_pred, le
)

print("\n      Per-Class Metrics:")
print(baseline_results.to_string(index=False))

print("\n      Overall Metrics:")
for metric, value in baseline_overall.items():
    print(f"         {metric:20s}: {value:.4f}")

Baseline Model (No Augmentation):

      Per-Class Metrics:
     Class  Precision   Recall  F1-Score  Support
    benign   0.994891 0.997993  0.996440    27903
bruteforce   1.000000 1.000000  1.000000      709
      ddos   0.999219 0.995644  0.997429     6428
       dos   0.997247 0.998928  0.998086     6527
   malware   0.997231 0.995508  0.996369     2894
      mitm   0.998646 0.998646  0.998646     2955
     recon   0.994986 0.989363  0.992167    12034
       web   1.000000 0.998124  0.999061     1066

      Overall Metrics:
         Accuracy            : 0.9961
         MCC                 : 0.9945
         Macro Precision     : 0.9978
         Macro Recall        : 0.9968
         Macro F1            : 0.9973
         Weighted Precision  : 0.9961
         Weighted Recall     : 0.9961
         Weighted F1         : 0.9961


In [None]:
# Comparison table
print("Performance Comparison (A-FIGS vs Baseline):")
print("\n      Model Comparison (XGBoost):")

comparison_df = pd.DataFrame({
    'Metric': list(baseline_overall.keys()),
    'Baseline': list(baseline_overall.values()),
    'A-FIGS (XGBoost)': list(all_results['XGBoost']['overall'].values())
})

comparison_df['Improvement'] = (
    (comparison_df['A-FIGS (XGBoost)'] - comparison_df['Baseline']) /
    comparison_df['Baseline'] * 100
)

print(comparison_df.to_string(index=False))

Performance Comparison (A-FIGS vs Baseline):

      Model Comparison (XGBoost):
            Metric  Baseline  A-FIGS (XGBoost)  Improvement
          Accuracy  0.996067          0.983376    -1.274097
               MCC  0.994536          0.976963    -1.766908
   Macro Precision  0.997778          0.991271    -0.652088
      Macro Recall  0.996776          0.977714    -1.912320
          Macro F1  0.997275          0.984312    -1.299823
Weighted Precision  0.996069          0.983619    -1.249913
   Weighted Recall  0.996067          0.983376    -1.274097
       Weighted F1  0.996065          0.983294    -1.282104


In [None]:
# Per-class improvement
print("Per-Class F1-Score Improvement:")
baseline_f1 = baseline_results.set_index('Class')['F1-Score']
afigs_f1 = all_results['XGBoost']['per_class'].set_index('Class')['F1-Score']

improvement_df = pd.DataFrame({
    'Class': baseline_f1.index,
    'Baseline F1': baseline_f1.values,
    'A-FIGS F1': afigs_f1.values,
    'Improvement': ((afigs_f1.values - baseline_f1.values) /
                    (baseline_f1.values + 1e-10) * 100)
})

print(improvement_df.to_string(index=False))

Per-Class F1-Score Improvement:
     Class  Baseline F1  A-FIGS F1  Improvement
    benign     0.985716   0.985667    -0.004943
bruteforce     0.983583   0.983676     0.009477
      ddos     0.987831   0.987035    -0.080509
       dos     0.989290   0.988068    -0.123540
   malware     0.987320   0.984331    -0.302669
      mitm     0.987295   0.988525     0.124638
     recon     0.966825   0.967596     0.079734
       web     0.976967   0.976967     0.000000


In [None]:
# Highlight Limited and Sparse classes
print("Focus on Augmented Classes:")
augmented_classes = limited_classes + sparse_classes
for attack_class in augmented_classes:
    baseline_f1_val = baseline_f1.get(attack_class, 0)
    afigs_f1_val = afigs_f1.get(attack_class, 0)
    improvement = ((afigs_f1_val - baseline_f1_val) / (baseline_f1_val + 1e-10) * 100)
    category = categories.get(attack_class, 'Unknown')

    print(f"         {attack_class:12s} ({category:8s}): "
          f"Baseline={baseline_f1_val:.4f}, A-FIGS={afigs_f1_val:.4f}, "
          f"Improvement={improvement:+6.2f}%")

Focus on Augmented Classes:
         dos          (Limited ): Baseline=0.9893, A-FIGS=0.9881, Improvement= -0.12%
         mitm         (Limited ): Baseline=0.9873, A-FIGS=0.9885, Improvement= +0.12%
         malware      (Sparse  ): Baseline=0.9873, A-FIGS=0.9843, Improvement= -0.30%
         web          (Sparse  ): Baseline=0.9770, A-FIGS=0.9770, Improvement= +0.00%
         bruteforce   (Sparse  ): Baseline=0.9836, A-FIGS=0.9837, Improvement= +0.01%


**8.5 Visualization & Reporting**

In [None]:
def plot_model_comparison(all_results, save_path='model_comparison.png'):
    """
    Plot comprehensive model comparison
    """
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))

    models = list(all_results.keys())

    # 1. Overall Accuracy Comparison
    accuracies = [all_results[model]['overall']['Accuracy'] for model in models]
    axes[0, 0].bar(models, accuracies, color=['steelblue', 'coral', 'lightgreen'],
                   edgecolor='black')
    axes[0, 0].set_ylabel('Accuracy')
    axes[0, 0].set_title('Overall Accuracy Comparison')
    axes[0, 0].set_ylim([0.7, 1.0])
    axes[0, 0].grid(axis='y', alpha=0.3)

    for i, v in enumerate(accuracies):
        axes[0, 0].text(i, v + 0.01, f'{v:.4f}', ha='center', fontweight='bold')

    # 2. MCC Comparison
    mccs = [all_results[model]['overall']['MCC'] for model in models]
    axes[0, 1].bar(models, mccs, color=['steelblue', 'coral', 'lightgreen'],
                   edgecolor='black')
    axes[0, 1].set_ylabel('Matthews Correlation Coefficient')
    axes[0, 1].set_title('MCC Comparison')
    axes[0, 1].set_ylim([0.5, 1.0])
    axes[0, 1].grid(axis='y', alpha=0.3)

    for i, v in enumerate(mccs):
        axes[0, 1].text(i, v + 0.01, f'{v:.4f}', ha='center', fontweight='bold')

    # 3. Macro F1-Score Comparison
    macro_f1s = [all_results[model]['overall']['Macro F1'] for model in models]
    axes[1, 0].bar(models, macro_f1s, color=['steelblue', 'coral', 'lightgreen'],
                   edgecolor='black')
    axes[1, 0].set_ylabel('Macro F1-Score')
    axes[1, 0].set_title('Macro F1-Score Comparison')
    axes[1, 0].set_ylim([0.7, 1.0])
    axes[1, 0].grid(axis='y', alpha=0.3)

    for i, v in enumerate(macro_f1s):
        axes[1, 0].text(i, v + 0.01, f'{v:.4f}', ha='center', fontweight='bold')

    # 4. Per-Class F1-Score Comparison (Best model)
    best_model = models[np.argmax(accuracies)]
    per_class_f1 = all_results[best_model]['per_class']

    colors = ['green' if cls in plentiful_classes
              else 'orange' if cls in limited_classes
              else 'red' for cls in per_class_f1['Class']]

    axes[1, 1].barh(per_class_f1['Class'], per_class_f1['F1-Score'],
                    color=colors, edgecolor='black')
    axes[1, 1].set_xlabel('F1-Score')
    axes[1, 1].set_title(f'Per-Class F1-Score ({best_model})')
    axes[1, 1].set_xlim([0, 1.1])
    axes[1, 1].grid(axis='x', alpha=0.3)

    # Add legend
    from matplotlib.patches import Patch
    legend_elements = [
        Patch(facecolor='green', edgecolor='black', label='Plentiful'),
        Patch(facecolor='orange', edgecolor='black', label='Limited'),
        Patch(facecolor='red', edgecolor='black', label='Sparse')
    ]
    axes[1, 1].legend(handles=legend_elements, loc='lower right')

    plt.tight_layout()
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    print(f"      Saved: {save_path}")
    plt.close()

In [None]:
# Generate comparison plot
plot_model_comparison(all_results)

def plot_baseline_vs_afigs(baseline_results, afigs_results, categories,
                           save_path='baseline_vs_afigs.png'):
    """
    Plot detailed baseline vs A-FIGS comparison
    """
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))

    classes = baseline_results['Class'].values

    # Get category colors
    colors = ['green' if cls in plentiful_classes
              else 'orange' if cls in limited_classes
              else 'red' for cls in classes]

    # 1. Precision Comparison
    x = np.arange(len(classes))
    width = 0.35

    axes[0].bar(x - width/2, baseline_results['Precision'], width,
                label='Baseline', color='lightgray', edgecolor='black')
    axes[0].bar(x + width/2, afigs_results['Precision'], width,
                label='A-FIGS', color=colors, edgecolor='black', alpha=0.8)
    axes[0].set_ylabel('Precision')
    axes[0].set_title('Precision Comparison')
    axes[0].set_xticks(x)
    axes[0].set_xticklabels(classes, rotation=45, ha='right')
    axes[0].legend()
    axes[0].grid(axis='y', alpha=0.3)

    # 2. Recall Comparison
    axes[1].bar(x - width/2, baseline_results['Recall'], width,
                label='Baseline', color='lightgray', edgecolor='black')
    axes[1].bar(x + width/2, afigs_results['Recall'], width,
                label='A-FIGS', color=colors, edgecolor='black', alpha=0.8)
    axes[1].set_ylabel('Recall')
    axes[1].set_title('Recall Comparison')
    axes[1].set_xticks(x)
    axes[1].set_xticklabels(classes, rotation=45, ha='right')
    axes[1].legend()
    axes[1].grid(axis='y', alpha=0.3)

    # 3. F1-Score Comparison
    axes[2].bar(x - width/2, baseline_results['F1-Score'], width,
                label='Baseline', color='lightgray', edgecolor='black')
    axes[2].bar(x + width/2, afigs_results['F1-Score'], width,
                label='A-FIGS', color=colors, edgecolor='black', alpha=0.8)
    axes[2].set_ylabel('F1-Score')
    axes[2].set_title('F1-Score Comparison')
    axes[2].set_xticks(x)
    axes[2].set_xticklabels(classes, rotation=45, ha='right')
    axes[2].legend()
    axes[2].grid(axis='y', alpha=0.3)

    # Add category legend
    from matplotlib.patches import Patch
    legend_elements = [
        Patch(facecolor='green', edgecolor='black', label='Plentiful'),
        Patch(facecolor='orange', edgecolor='black', label='Limited'),
        Patch(facecolor='red', edgecolor='black', label='Sparse')
    ]
    axes[2].legend(handles=legend_elements, loc='lower right',
                   title='Category', framealpha=0.9)

    plt.tight_layout()
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    print(f"      Saved: {save_path}")
    plt.close()

      Saved: model_comparison.png


In [None]:
# Generate baseline vs A-FIGS comparison
plot_baseline_vs_afigs(
    baseline_results,
    all_results['XGBoost']['per_class'],
    categories
)

# Generate final summary report
print("\n" + "="*80)
print("FINAL SUMMARY REPORT")
print("="*80)

print("\n1. DATASET STATISTICS:")
print(f"      Original Train: {len(train_df):,} samples")
print(f"      Augmented Train: {len(train_augmented):,} samples (+{len(train_augmented)-len(train_df):,})")
print(f"      Test Set: {len(test_df):,} samples")
print(f"      Features: {len(feature_cols)}")
print(f"      Selected Features: {len(selected_important_features)} ({(len(selected_important_features)/len(feature_cols)*100):.1f}%)")

print("\n2. CLASS DISTRIBUTION:")
print(f"      Plentiful: {len(plentiful_classes)} classes - {plentiful_classes}")
print(f"      Limited:   {len(limited_classes)} classes - {limited_classes}")
print(f"      Sparse:    {len(sparse_classes)} classes - {sparse_classes}")

print("\n3. AUGMENTATION SUMMARY:")
total_synthetic = 0
for attack_class in limited_classes:
    if attack_class in synthetic_data_limited:
        n_synth = len(synthetic_data_limited[attack_class])
        total_synthetic += n_synth
        print(f"      {attack_class:12s} (FIGAN):   +{n_synth:,} samples")

for attack_class in sparse_classes:
    if attack_class in synthetic_data_sparse:
        n_synth = len(synthetic_data_sparse[attack_class])
        total_synthetic += n_synth
        print(f"      {attack_class:12s} (FISMOTE): +{n_synth:,} samples")

print(f"\n      Total synthetic samples: {total_synthetic:,}")

print("\n4. MODEL PERFORMANCE (Test Set):")
for model_name in ['XGBoost', 'LightGBM', 'Random Forest']:
    metrics = all_results[model_name]['overall']
    print(f"\n   {model_name}:")
    print(f"      Accuracy:        {metrics['Accuracy']:.4f}")
    print(f"      MCC:             {metrics['MCC']:.4f}")
    print(f"      Macro F1:        {metrics['Macro F1']:.4f}")
    print(f"      Weighted F1:     {metrics['Weighted F1']:.4f}")

print("\n5. A-FIGS IMPROVEMENT (vs Baseline):")
print(f"      Accuracy:    {comparison_df.loc[comparison_df['Metric']=='Accuracy', 'Improvement'].values[0]:+.2f}%")
print(f"      MCC:         {comparison_df.loc[comparison_df['Metric']=='MCC', 'Improvement'].values[0]:+.2f}%")
print(f"      Macro F1:    {comparison_df.loc[comparison_df['Metric']=='Macro F1', 'Improvement'].values[0]:+.2f}%")

print("\n6. CRITICAL ATTACK DETECTION (Sparse & Limited):")
for attack_class in augmented_classes:
    if attack_class in baseline_f1.index and attack_class in afigs_f1.index:
        baseline_val = baseline_f1[attack_class]
        afigs_val = afigs_f1[attack_class]
        improvement = ((afigs_val - baseline_val) / (baseline_val + 1e-10) * 100)
        category = categories[attack_class]
        severity = severity_weights[attack_class]

        print(f"      {attack_class:12s} | Severity={severity:.1f} | "
              f"Baseline F1={baseline_val:.4f} | A-FIGS F1={afigs_val:.4f} | "
              f"Δ={improvement:+6.2f}%")

      Saved: baseline_vs_afigs.png

FINAL SUMMARY REPORT

1. DATASET STATISTICS:
      Original Train: 486,126 samples
      Augmented Train: 782,536 samples (+296,410)
      Test Set: 60,516 samples
      Features: 71
      Selected Features: 60 (84.5%)

2. CLASS DISTRIBUTION:
      Plentiful: 3 classes - ['benign', 'recon', 'ddos']
      Limited:   2 classes - ['dos', 'mitm']
      Sparse:    3 classes - ['malware', 'web', 'bruteforce']

3. AUGMENTATION SUMMARY:
      dos          (FIGAN):   +75,239 samples
      mitm         (FIGAN):   +103,587 samples
      malware      (FISMOTE): +29,285 samples
      web          (FISMOTE): +42,886 samples
      bruteforce   (FISMOTE): +45,413 samples

      Total synthetic samples: 296,410

4. MODEL PERFORMANCE (Test Set):

   XGBoost:
      Accuracy:        0.9825
      MCC:             0.9757
      Macro F1:        0.9827
      Weighted F1:     0.9824

   LightGBM:
      Accuracy:        0.9866
      MCC:             0.9814
      Macro F1:    

In [None]:
# Memori Yang Tersedia
memory_available = psutil.virtual_memory().available / (1024 ** 3)  # Convert to GB
print(f"Memory yang tersedia: {memory_available:.2f} GB")

Memory yang tersedia: 6.03 GB


In [None]:
# ============================================================================
# DATA LEAKAGE CHECK
# ============================================================================
print("\n" + "="*60)
print("DATA LEAKAGE VERIFICATION")
print("="*60)

feature_cols = [col for col in train_df.columns if col not in ['label2', 'time_window']]

# 1. Check feature-level overlap
print("\n[1] Checking feature duplication between train/test...")

test_hash = test_df[feature_cols].apply(lambda x: hash(tuple(x)), axis=1)
train_hash = train_df[feature_cols].apply(lambda x: hash(tuple(x)), axis=1)

overlap = test_hash.isin(train_hash).sum()
print(f"    Test rows with same features in train: {overlap} / {len(test_df)}")

if overlap > 0:
    print(f"    ⚠️ WARNING: {(overlap/len(test_df)*100):.2f}% test data leaked from train!")
else:
    print(f"    ✓ No feature-level leakage detected")

# 2. Check time window separation
print("\n[2] Verifying time window separation...")
train_windows = train_df['time_window'].unique()
test_windows = test_df['time_window'].unique()

print(f"    Train windows: {sorted(train_windows)}")
print(f"    Test windows: {sorted(test_windows)}")

window_overlap = set(train_windows) & set(test_windows)
if len(window_overlap) > 0:
    print(f"    ⚠️ WARNING: Overlapping windows: {window_overlap}")
else:
    print(f"    ✓ No time window overlap")

# 3. Check if normalization/scaling was done correctly
print("\n[3] Feature statistics comparison...")
print("    (Train and test should have different distributions)")

sample_features = feature_cols[:5]
for feat in sample_features:
    train_mean = train_df[feat].mean()
    test_mean = test_df[feat].mean()
    train_std = train_df[feat].std()
    test_std = test_df[feat].std()

    print(f"    {feat[:30]:30s}: Train μ={train_mean:8.2f} σ={train_std:8.2f} | Test μ={test_mean:8.2f} σ={test_std:8.2f}")

print("\n" + "="*60)


DATA LEAKAGE VERIFICATION

[1] Checking feature duplication between train/test...
    Test rows with same features in train: 13770 / 60516

[2] Verifying time window separation...
    Train windows: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8)]
    Test windows: [np.int64(9), np.int64(10)]
    ✓ No time window overlap

[3] Feature statistics comparison...
    (Train and test should have different distributions)
    log_data-ranges_avg           : Train μ=   -0.00 σ=    1.00 | Test μ=   -0.03 σ=    0.97
    log_data-ranges_max           : Train μ=    0.00 σ=    1.00 | Test μ=   -0.02 σ=    0.98
    log_data-ranges_min           : Train μ=   -0.00 σ=    1.00 | Test μ=   -0.04 σ=    0.96
    log_data-ranges_std_deviation : Train μ=   -0.00 σ=    1.00 | Test μ=    0.03 σ=    1.10
    log_data-types_count          : Train μ=   -0.00 σ=    1.00 | Test μ=    0.03 σ=    1.04

