In [None]:
import numpy as np
import torch
import torch.nn.functional as F
import gym
from gym import spaces
from stable_baselines3 import RecurrentPPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor

# Load pretrained ConvAE (frozen) and AE stats
device = "cuda" if torch.cuda.is_available() else "cpu"
ae_model = ConvAE(feat_dim=X_unlabeled.shape[2], seq_len=X_unlabeled.shape[1], latent_dim=128)
ae_model.load_state_dict(torch.load("convAE_best.pth", map_location=device))
ae_model.to(device)
ae_model.eval()
for p in ae_model.parameters():
    p.requires_grad = False
print(f"‚úÖ Loaded ConvAE on {device} (frozen for RecurrentPPO).")

stats = np.load("convAE_stats.npz")
ae_mean, ae_std = stats["mean_err"], stats["std_err"]
print(f"‚úÖ AE normalization loaded: mean={ae_mean:.6f}, std={ae_std:.6f}")


# ============================================================================
# STEP 1: Reconstruct chronological order
# ============================================================================
print("\n" + "="*70)
print("üîÑ RECONSTRUCTING CHRONOLOGICAL ORDER")
print("="*70)

# Load labeled and unlabeled data
X_labeled = np.load("X_labeled.npy")
y_labeled = np.load("y_labeled.npy")
X_unlabeled = np.load("X_unlabeled.npy")
y_unlabeled = np.load("y_unlabeled.npy")

# Load indices
labeled_indices = np.load("labeled_indices.npy")
unlabeled_indices = np.load("unlabeled_indices.npy")
train_split_size = np.load("train_split_size.npy")[0]

print(f"üìä Data loaded:")
print(f"   Labeled samples: {len(X_labeled):,}")
print(f"   Unlabeled samples: {len(X_unlabeled):,}")
print(f"   Original training size: {train_split_size:,}")

# Reconstruct X and y in chronological order
X_train_reconstructed = np.zeros((train_split_size, *X_labeled.shape[1:]), dtype=X_labeled.dtype)
y_train_reconstructed = np.zeros(train_split_size, dtype=y_labeled.dtype)

# Place data back at original positions
X_train_reconstructed[labeled_indices] = X_labeled
y_train_reconstructed[labeled_indices] = y_labeled
X_train_reconstructed[unlabeled_indices] = X_unlabeled
y_train_reconstructed[unlabeled_indices] = y_unlabeled

# Create supervision mask (1 = labeled/supervised, 0 = unlabeled/unsupervised)
supervision_mask = np.zeros(train_split_size, dtype=np.int8)
supervision_mask[labeled_indices] = 1  # These positions have supervision

print(f"\n‚úÖ Reconstructed training data:")
print(f"   Shape: {X_train_reconstructed.shape}")
print(f"   Supervised positions: {np.sum(supervision_mask):,} ({np.sum(supervision_mask)/train_split_size*100:.2f}%)")
print(f"   Unsupervised positions: {np.sum(1-supervision_mask):,} ({np.sum(1-supervision_mask)/train_split_size*100:.2f}%)")

# Verify reconstruction
y_train_original = np.load("y_train_seq.npy")
if np.array_equal(y_train_reconstructed, y_train_original):
    print("‚úÖ VERIFICATION PASSED: Reconstruction matches original!")
else:
    print("‚ö†Ô∏è  WARNING: Reconstruction mismatch detected!")

print("="*70 + "\n")


# ============================================================================
# STEP 2: PPO Environment (Chronological Sequential)
# ============================================================================

class PPOAEEnvChronological(gym.Env):
    """
    Sequential environment that processes windows in chronological order.
    Uses supervision_mask to determine which positions have labels.
    """
    def __init__(self, X_train, y_train, supervision_mask, ae_model,
                 embeddings, lambda_int=1.0, max_steps=200):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.supervision_mask = supervision_mask  # 1=supervised, 0=unsupervised
        self.ae_model = ae_model
        self.device = next(ae_model.parameters()).device
        self.lambda_int = lambda_int
        self.max_steps = max_steps
        self.steps = 0
        self.idx = 0
        self.embeddings = embeddings

        emb_dim = self.embeddings.shape[1]
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, 
            shape=(emb_dim + 1,), 
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(2)  # 0 = normal, 1 = anomaly

        # Statistics
        n_supervised = np.sum(supervision_mask)
        n_unsupervised = len(supervision_mask) - n_supervised
        print(f"\nüîπ Environment initialized:")
        print(f"   Total windows: {len(X_train):,}")
        print(f"   Supervised: {n_supervised:,} ({n_supervised/len(X_train)*100:.2f}%)")
        print(f"   Unsupervised: {n_unsupervised:,} ({n_unsupervised/len(X_train)*100:.2f}%)")

    def _ae_error(self, x):
        """Compute normalized reconstruction error."""
        with torch.no_grad():
            x = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to(self.device)
            recon = self.ae_model(x)
            loss = F.mse_loss(recon, x, reduction="mean").item()
        norm = np.tanh((loss - ae_mean) / (ae_std + 1e-8))
        return max(0, norm)

    def reset(self, *, seed=None, options=None):
        """Reset to start of sequence (or random position 10% of time)."""
        super().reset(seed=seed)
        self.steps = 0
        
        # Start from beginning or random position
        if np.random.rand() < 0.1:  # 10% random start for variety
            self.idx = np.random.randint(0, len(self.X_train))
        else:
            self.idx = 0
        
        emb = self.embeddings[self.idx]
        err = np.array([self._ae_error(self.X_train[self.idx])])
        obs = np.concatenate([emb, err]).astype(np.float32)
        return obs, {}

    def step(self, action):
        """Take action on current window, move to next chronologically."""
        self.steps += 1
        
        # Current window
        x = self.X_train[self.idx]
        true_label = self.y_train[self.idx]
        is_supervised = self.supervision_mask[self.idx]
        
        # Compute reconstruction error
        err = self._ae_error(x)
        
        # ========== REWARD CALCULATION ==========
        
        # External reward (only for supervised positions)
        if is_supervised == 1:
            # We have ground truth label
            external_reward = 1.0 if action == true_label else -0.5
        else:
            # No supervision - no external reward
            external_reward = 0.0
        
        # Intrinsic reward (based on reconstruction error)
        # Encourage anomaly prediction when error is high
        if action == 1:  # Predicted anomaly
            intrinsic_reward = self.lambda_int * err
            # Penalize weak false positives
            if err < 0.05:
                intrinsic_reward -= 0.5
        else:  # Predicted normal
            intrinsic_reward = 0.0
        
        # Total reward
        reward = external_reward + intrinsic_reward
        reward = np.clip(reward, -5, 5)
        
        # ========== MOVE TO NEXT WINDOW (CHRONOLOGICALLY) ==========
        self.idx = (self.idx + 1) % len(self.X_train)
        
        # Next observation
        next_emb = self.embeddings[self.idx]
        next_err = np.array([self._ae_error(self.X_train[self.idx])])
        obs = np.concatenate([next_emb, next_err]).astype(np.float32)
        
        done = self.steps >= self.max_steps
        
        return obs, reward, done, False, {}


# ============================================================================
# STEP 3: Precompute embeddings
# ============================================================================

def compute_embeddings(X):
    """Compute embeddings for all windows."""
    with torch.no_grad():
        tensors = torch.tensor(X, dtype=torch.float32).to(device)
        embeds = ae_model.encode(tensors).cpu().numpy()
    return embeds

print("üîπ Precomputing embeddings for reconstructed data...")
embeddings_train = compute_embeddings(X_train_reconstructed)
print(f"‚úÖ Embeddings computed: {embeddings_train.shape}")


# ============================================================================
# STEP 4: Create VecEnv + Recurrent PPO
# ============================================================================

def make_env(rank):
    def _init():
        env = PPOAEEnvChronological(
            X_train_reconstructed, 
            y_train_reconstructed, 
            supervision_mask,
            ae_model,
            embeddings_train,
            lambda_int=0.8, 
            max_steps=200
        )
        return Monitor(env, f"logs/env_{rank}")
    return _init

num_envs = 1
vec_env = DummyVecEnv([make_env(i) for i in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=True, clip_obs=10.)

policy_kwargs = dict(
    lstm_hidden_size=128,
    n_lstm_layers=1,
    shared_lstm=False,
    net_arch=dict(pi=[256, 256], vf=[512, 512, 256])
)

model = RecurrentPPO(
    "MlpLstmPolicy",
    vec_env,
    verbose=1,
    learning_rate=1e-4,
    n_steps=128,
    batch_size=64,
    n_epochs=10,
    gamma=0.99,
    gae_lambda=0.95,
    ent_coef=0.02,
    clip_range=0.2,
    policy_kwargs=policy_kwargs,
    tensorboard_log="logs_recurrent_ppo_ae_chronological/"
)

print("\n" + "="*70)
print("üöÄ Starting RecurrentPPO training with chronological data")
print("="*70 + "\n")


# ============================================================================
# STEP 5: Train Recurrent PPO
# ============================================================================

model.learn(total_timesteps=200_000)
model.save("recurrent_ppo_ae_chronological")
vec_env.save("vec_normalize_recurrent_ppo_chronological.pkl")

print("\n‚úÖ Training complete! Chronological PPO model saved.")
print("   Model: recurrent_ppo_ae_chronological.zip")
print("   VecNormalize: vec_normalize_recurrent_ppo_chronological.pkl")

In [None]:
import numpy as np
import torch
import torch.nn.functional as F
import gym
from gym import spaces
from sb3_contrib import QRDQN
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor

# Load pretrained ConvAE (frozen) and AE stats
device = "cuda" if torch.cuda.is_available() else "cpu"
ae_model = ConvAE(feat_dim=X_unlabeled.shape[2], seq_len=X_unlabeled.shape[1], latent_dim=128)
ae_model.load_state_dict(torch.load("convAE_best.pth", map_location=device))
ae_model.to(device)
ae_model.eval()
for p in ae_model.parameters():
    p.requires_grad = False
print(f"‚úÖ Loaded ConvAE on {device} (frozen for QR-DQN).")

stats = np.load("convAE_stats.npz")
ae_mean, ae_std = stats["mean_err"], stats["std_err"]
print(f"‚úÖ AE normalization loaded: mean={ae_mean:.6f}, std={ae_std:.6f}")


# ============================================================================
# STEP 1: Reconstruct chronological order
# ============================================================================
print("\n" + "="*70)
print("üîÑ RECONSTRUCTING CHRONOLOGICAL ORDER")
print("="*70)

# Load labeled and unlabeled data
X_labeled = np.load("X_labeled.npy")
y_labeled = np.load("y_labeled.npy")
X_unlabeled = np.load("X_unlabeled.npy")
y_unlabeled = np.load("y_unlabeled.npy")

# Load indices
labeled_indices = np.load("labeled_indices.npy")
unlabeled_indices = np.load("unlabeled_indices.npy")
train_split_size = np.load("train_split_size.npy")[0]

print(f"üìä Data loaded:")
print(f"   Labeled samples: {len(X_labeled):,}")
print(f"   Unlabeled samples: {len(X_unlabeled):,}")
print(f"   Original training size: {train_split_size:,}")

# Reconstruct X and y in chronological order
X_train_reconstructed = np.zeros((train_split_size, *X_labeled.shape[1:]), dtype=X_labeled.dtype)
y_train_reconstructed = np.zeros(train_split_size, dtype=y_labeled.dtype)

# Place data back at original positions
X_train_reconstructed[labeled_indices] = X_labeled
y_train_reconstructed[labeled_indices] = y_labeled
X_train_reconstructed[unlabeled_indices] = X_unlabeled
y_train_reconstructed[unlabeled_indices] = y_unlabeled

# Create supervision mask (1 = labeled/supervised, 0 = unlabeled/unsupervised)
supervision_mask = np.zeros(train_split_size, dtype=np.int8)
supervision_mask[labeled_indices] = 1

print(f"\n‚úÖ Reconstructed training data:")
print(f"   Shape: {X_train_reconstructed.shape}")
print(f"   Supervised positions: {np.sum(supervision_mask):,} ({np.sum(supervision_mask)/train_split_size*100:.2f}%)")
print(f"   Unsupervised positions: {np.sum(1-supervision_mask):,} ({np.sum(1-supervision_mask)/train_split_size*100:.2f}%)")

# Verify reconstruction
y_train_original = np.load("y_train_seq.npy")
if np.array_equal(y_train_reconstructed, y_train_original):
    print("‚úÖ VERIFICATION PASSED: Reconstruction matches original!")
else:
    print("‚ö†Ô∏è  WARNING: Reconstruction mismatch detected!")

print("="*70 + "\n")


# ============================================================================
# STEP 2: QR-DQN Environment (Chronological Sequential)
# ============================================================================

class QRDQNAEEnvChronological(gym.Env):
    """
    Sequential environment that processes windows in chronological order.
    Uses supervision_mask to determine which positions have labels.
    """
    def __init__(self, X_train, y_train, supervision_mask, ae_model,
                 embeddings, lambda_int=1.0, max_steps=200):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.supervision_mask = supervision_mask
        self.ae_model = ae_model
        self.device = next(ae_model.parameters()).device
        self.lambda_int = lambda_int
        self.max_steps = max_steps
        self.steps = 0
        self.idx = 0
        self.embeddings = embeddings

        emb_dim = self.embeddings.shape[1]
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, 
            shape=(emb_dim + 1,), 
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(2)  # 0 = normal, 1 = anomaly

        # Statistics
        n_supervised = np.sum(supervision_mask)
        n_unsupervised = len(supervision_mask) - n_supervised
        print(f"\nüîπ Environment initialized:")
        print(f"   Total windows: {len(X_train):,}")
        print(f"   Supervised: {n_supervised:,} ({n_supervised/len(X_train)*100:.2f}%)")
        print(f"   Unsupervised: {n_unsupervised:,} ({n_unsupervised/len(X_train)*100:.2f}%)")

    def _ae_error(self, x):
        """Compute normalized reconstruction error."""
        with torch.no_grad():
            x = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to(self.device)
            recon = self.ae_model(x)
            loss = F.mse_loss(recon, x, reduction="mean").item()
        norm = np.tanh((loss - ae_mean) / (ae_std + 1e-8))
        return max(0, norm)

    def reset(self, *, seed=None, options=None):
        """Reset to start of sequence (or random position 10% of time)."""
        super().reset(seed=seed)
        self.steps = 0
        
        # Start from beginning or random position
        if np.random.rand() < 0.1:  # 10% random start for variety
            self.idx = np.random.randint(0, len(self.X_train))
        else:
            self.idx = 0
        
        emb = self.embeddings[self.idx]
        err = np.array([self._ae_error(self.X_train[self.idx])])
        obs = np.concatenate([emb, err]).astype(np.float32)
        return obs, {}

    def step(self, action):
        """Take action on current window, move to next chronologically."""
        self.steps += 1
        
        # Current window
        x = self.X_train[self.idx]
        true_label = self.y_train[self.idx]
        is_supervised = self.supervision_mask[self.idx]
        
        # Compute reconstruction error
        err = self._ae_error(x)
        
        # ========== REWARD CALCULATION ==========
        
        # External reward (only for supervised positions)
        if is_supervised == 1:
            # We have ground truth label
            external_reward = 1.0 if action == true_label else -0.5
        else:
            # No supervision - no external reward
            external_reward = 0.0
        
        # Intrinsic reward (based on reconstruction error)
        # Encourage anomaly prediction when error is high
        if action == 1:  # Predicted anomaly
            intrinsic_reward = self.lambda_int * err
            # Penalize weak false positives
            if err < 0.05:
                intrinsic_reward -= 0.5
        else:  # Predicted normal
            intrinsic_reward = 0.0
        
        # Total reward
        reward = external_reward + intrinsic_reward
        reward = np.clip(reward, -5, 5)
        
        # ========== MOVE TO NEXT WINDOW (CHRONOLOGICALLY) ==========
        self.idx = (self.idx + 1) % len(self.X_train)
        
        # Next observation
        next_emb = self.embeddings[self.idx]
        next_err = np.array([self._ae_error(self.X_train[self.idx])])
        obs = np.concatenate([next_emb, next_err]).astype(np.float32)
        
        done = self.steps >= self.max_steps
        
        return obs, reward, done, False, {}


# ============================================================================
# STEP 3: Precompute embeddings
# ============================================================================

def compute_embeddings(X):
    """Compute embeddings for all windows."""
    with torch.no_grad():
        tensors = torch.tensor(X, dtype=torch.float32).to(device)
        embeds = ae_model.encode(tensors).cpu().numpy()
    return embeds

print("üîπ Precomputing embeddings for reconstructed data...")
embeddings_train = compute_embeddings(X_train_reconstructed)
print(f"‚úÖ Embeddings computed: {embeddings_train.shape}")


# ============================================================================
# STEP 4: Create VecEnv + QR-DQN
# ============================================================================

def make_env(rank):
    def _init():
        env = QRDQNAEEnvChronological(
            X_train_reconstructed, 
            y_train_reconstructed, 
            supervision_mask,
            ae_model,
            embeddings_train,
            lambda_int=0.8, 
            max_steps=200
        )
        return Monitor(env, f"logs/env_{rank}")
    return _init

num_envs = 1
vec_env = DummyVecEnv([make_env(i) for i in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=True, clip_obs=10.)

# Policy kwargs (deeper for distributional modeling)
policy_kwargs = dict(
    net_arch=[512, 256]  # QR-DQN benefits from slightly deeper nets
)

model = QRDQN(
    "MlpPolicy",
    vec_env,
    verbose=1,
    learning_rate=1e-4,
    buffer_size=150_000,         
    learning_starts=5_000,
    batch_size=64,
    tau=1.0,
    gamma=0.99,
    train_freq=4,
    target_update_interval=10_000,
    exploration_fraction=0.2,
    exploration_initial_eps=1.0,
    exploration_final_eps=0.02,
    policy_kwargs=policy_kwargs,
    tensorboard_log="logs_qrdqn_ae_chronological/"
)

print("\n" + "="*70)
print("üöÄ Starting QR-DQN training with chronological data")
print("="*70 + "\n")


# ============================================================================
# STEP 5: Train QR-DQN
# ============================================================================

model.learn(total_timesteps=200_000)  # More timesteps recommended for distributional methods
model.save("qrdqn_ae_chronological")
vec_env.save("vec_normalize_qrdqn_chronological.pkl")

print("\n‚úÖ Training complete! Chronological QR-DQN model saved.")
print("   Model: qrdqn_ae_chronological.zip")
print("   VecNormalize: vec_normalize_qrdqn_chronological.pkl")

In [None]:
import numpy as np
import torch
import torch.nn.functional as F
import gym
from gym import spaces
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor

# Load pretrained ConvAE (frozen) and AE stats
device = "cuda" if torch.cuda.is_available() else "cpu"
ae_model = ConvAE(feat_dim=X_unlabeled.shape[2], seq_len=X_unlabeled.shape[1], latent_dim=128)
ae_model.load_state_dict(torch.load("convAE_best.pth", map_location=device))
ae_model.to(device)
ae_model.eval()
for p in ae_model.parameters():
    p.requires_grad = False
print(f"‚úÖ Loaded ConvAE on {device} (frozen for DQN).")

stats = np.load("convAE_stats.npz")
ae_mean, ae_std = stats["mean_err"], stats["std_err"]
print(f"‚úÖ AE normalization loaded: mean={ae_mean:.6f}, std={ae_std:.6f}")


# ============================================================================
# STEP 1: Reconstruct chronological order
# ============================================================================
print("\n" + "="*70)
print("üîÑ RECONSTRUCTING CHRONOLOGICAL ORDER")
print("="*70)

# Load labeled and unlabeled data
X_labeled = np.load("X_labeled.npy")
y_labeled = np.load("y_labeled.npy")
X_unlabeled = np.load("X_unlabeled.npy")
y_unlabeled = np.load("y_unlabeled.npy")

# Load indices
labeled_indices = np.load("labeled_indices.npy")
unlabeled_indices = np.load("unlabeled_indices.npy")
train_split_size = np.load("train_split_size.npy")[0]

print(f"üìä Data loaded:")
print(f"   Labeled samples: {len(X_labeled):,}")
print(f"   Unlabeled samples: {len(X_unlabeled):,}")
print(f"   Original training size: {train_split_size:,}")

# Reconstruct X and y in chronological order
X_train_reconstructed = np.zeros((train_split_size, *X_labeled.shape[1:]), dtype=X_labeled.dtype)
y_train_reconstructed = np.zeros(train_split_size, dtype=y_labeled.dtype)

# Place data back at original positions
X_train_reconstructed[labeled_indices] = X_labeled
y_train_reconstructed[labeled_indices] = y_labeled
X_train_reconstructed[unlabeled_indices] = X_unlabeled
y_train_reconstructed[unlabeled_indices] = y_unlabeled

# Create supervision mask (1 = labeled/supervised, 0 = unlabeled/unsupervised)
supervision_mask = np.zeros(train_split_size, dtype=np.int8)
supervision_mask[labeled_indices] = 1

print(f"\n‚úÖ Reconstructed training data:")
print(f"   Shape: {X_train_reconstructed.shape}")
print(f"   Supervised positions: {np.sum(supervision_mask):,} ({np.sum(supervision_mask)/train_split_size*100:.2f}%)")
print(f"   Unsupervised positions: {np.sum(1-supervision_mask):,} ({np.sum(1-supervision_mask)/train_split_size*100:.2f}%)")

# Verify reconstruction
y_train_original = np.load("y_train_seq.npy")
if np.array_equal(y_train_reconstructed, y_train_original):
    print("‚úÖ VERIFICATION PASSED: Reconstruction matches original!")
else:
    print("‚ö†Ô∏è  WARNING: Reconstruction mismatch detected!")

print("="*70 + "\n")


# ============================================================================
# STEP 2: DQN Environment (Chronological Sequential)
# ============================================================================

class DQNAEEnvChronological(gym.Env):
    """
    Sequential environment that processes windows in chronological order.
    Uses supervision_mask to determine which positions have labels.
    """
    def __init__(self, X_train, y_train, supervision_mask, ae_model,
                 embeddings, lambda_int=1.0, max_steps=200):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.supervision_mask = supervision_mask
        self.ae_model = ae_model
        self.device = next(ae_model.parameters()).device
        self.lambda_int = lambda_int
        self.max_steps = max_steps
        self.steps = 0
        self.idx = 0
        self.embeddings = embeddings

        emb_dim = self.embeddings.shape[1]
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, 
            shape=(emb_dim + 1,), 
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(2)  # 0 = normal, 1 = anomaly

        # Statistics
        n_supervised = np.sum(supervision_mask)
        n_unsupervised = len(supervision_mask) - n_supervised
        print(f"\nüîπ Environment initialized:")
        print(f"   Total windows: {len(X_train):,}")
        print(f"   Supervised: {n_supervised:,} ({n_supervised/len(X_train)*100:.2f}%)")
        print(f"   Unsupervised: {n_unsupervised:,} ({n_unsupervised/len(X_train)*100:.2f}%)")

    def _ae_error(self, x):
        """Compute normalized reconstruction error."""
        with torch.no_grad():
            x = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to(self.device)
            recon = self.ae_model(x)
            loss = F.mse_loss(recon, x, reduction="mean").item()
        norm = np.tanh((loss - ae_mean) / (ae_std + 1e-8))
        return max(0, norm)

    def reset(self, *, seed=45, options=None):
        """Reset to start of sequence (or random position 10% of time)."""
        super().reset(seed=seed)
        self.steps = 0
        
        # Start from beginning or random position
        if np.random.rand() < 0.1:  # 10% random start for variety
            self.idx = np.random.randint(0, len(self.X_train))
        else:
            self.idx = 0
        
        emb = self.embeddings[self.idx]
        err = np.array([self._ae_error(self.X_train[self.idx])])
        obs = np.concatenate([emb, err]).astype(np.float32)
        return obs, {}

    def step(self, action):
        """Take action on current window, move to next chronologically."""
        self.steps += 1
        
        # Current window
        x = self.X_train[self.idx]
        true_label = self.y_train[self.idx]
        is_supervised = self.supervision_mask[self.idx]
        
        # Compute reconstruction error
        err = self._ae_error(x)
        
        # ========== REWARD CALCULATION ==========
        
        # External reward (only for supervised positions)
        if is_supervised == 1:
            # We have ground truth label
            external_reward = 1.0 if action == true_label else -0.5
        else:
            # No supervision - no external reward
            external_reward = 0.0
        
        # Intrinsic reward (based on reconstruction error)
        # Encourage anomaly prediction when error is high
        if action == 1:  # Predicted anomaly
            intrinsic_reward = self.lambda_int * err
            # Penalize weak false positives
            if err < 0.05:
                intrinsic_reward -= 0.5
        else:  # Predicted normal
            intrinsic_reward = 0.0
        
        # Total reward
        reward = external_reward + intrinsic_reward
        reward = np.clip(reward, -5, 5)
        
        # ========== MOVE TO NEXT WINDOW (CHRONOLOGICALLY) ==========
        self.idx = (self.idx + 1) % len(self.X_train)
        
        # Next observation
        next_emb = self.embeddings[self.idx]
        next_err = np.array([self._ae_error(self.X_train[self.idx])])
        obs = np.concatenate([next_emb, next_err]).astype(np.float32)
        
        done = self.steps >= self.max_steps
        
        return obs, reward, done, False, {}


# ============================================================================
# STEP 3: Precompute embeddings
# ============================================================================

def compute_embeddings(X):
    """Compute embeddings for all windows."""
    with torch.no_grad():
        tensors = torch.tensor(X, dtype=torch.float32).to(device)
        embeds = ae_model.encode(tensors).cpu().numpy()
    return embeds

print("üîπ Precomputing embeddings for reconstructed data...")
embeddings_train = compute_embeddings(X_train_reconstructed)
print(f"‚úÖ Embeddings computed: {embeddings_train.shape}")


# ============================================================================
# STEP 4: Create VecEnv + DQN
# ============================================================================

def make_env(rank):
    def _init():
        env = DQNAEEnvChronological(
            X_train_reconstructed, 
            y_train_reconstructed, 
            supervision_mask,
            ae_model,
            embeddings_train,
            lambda_int=0.8, 
            max_steps=200
        )
        return Monitor(env, f"logs/env_{rank}")
    return _init

num_envs = 1
vec_env = DummyVecEnv([make_env(i) for i in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=True, clip_obs=10.)

# Simplified policy_kwargs for non-recurrent MLP (DQN Q-network)
policy_kwargs = dict(
    net_arch=[256, 256]  # Q-network layers
)

model = DQN(
    "MlpPolicy",
    vec_env,
    verbose=1,
    learning_rate=1e-4,
    buffer_size=150_000,  # Replay buffer for off-policy learning
    learning_starts=5_000,  # Warmup steps
    batch_size=64,
    tau=1.0,  # Target network update
    gamma=0.99,
    train_freq=4,  # Train every 4 steps
    target_update_interval=1_000,
    exploration_fraction=0.1,  # Epsilon decay
    exploration_initial_eps=1.0,
    exploration_final_eps=0.05,
    policy_kwargs=policy_kwargs,
    tensorboard_log="logs_dqn_ae_chronological/"
)

print("\n" + "="*70)
print("üöÄ Starting DQN training with chronological data")

print("="*70 + "\n")


# ============================================================================
# STEP 5: Train DQN
# ============================================================================

model.learn(total_timesteps=500_000)
model.save("dqn_ae_chronological")
vec_env.save("vec_normalize_dqn_chronological.pkl")

print("\n‚úÖ Training complete! Chronological DQN model saved.")
print("   Model: dqn_ae_chronological.zip")
print("   VecNormalize: vec_normalize_dqn_chronological.pkl")

In [None]:
import numpy as np
import torch
import torch.nn.functional as F
import gym
from gym import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor

# Load pretrained ConvAE (frozen) and AE stats
device = "cuda" if torch.cuda.is_available() else "cpu"
ae_model = ConvAE(feat_dim=X_unlabeled.shape[2], seq_len=X_unlabeled.shape[1], latent_dim=128)
ae_model.load_state_dict(torch.load("convAE_best.pth", map_location=device))
ae_model.to(device)
ae_model.eval()
for p in ae_model.parameters():
    p.requires_grad = False
print(f"‚úÖ Loaded ConvAE on {device} (frozen for Standard PPO).")

stats = np.load("convAE_stats.npz")
ae_mean, ae_std = stats["mean_err"], stats["std_err"]
print(f"‚úÖ AE normalization loaded: mean={ae_mean:.6f}, std={ae_std:.6f}")


# ============================================================================
# STEP 1: Reconstruct chronological order
# ============================================================================
print("\n" + "="*70)
print("üîÑ RECONSTRUCTING CHRONOLOGICAL ORDER")
print("="*70)

# Load labeled and unlabeled data
X_labeled = np.load("X_labeled.npy")
y_labeled = np.load("y_labeled.npy")
X_unlabeled = np.load("X_unlabeled.npy")
y_unlabeled = np.load("y_unlabeled.npy")

# Load indices
labeled_indices = np.load("labeled_indices.npy")
unlabeled_indices = np.load("unlabeled_indices.npy")
train_split_size = np.load("train_split_size.npy")[0]

print(f"üìä Data loaded:")
print(f"   Labeled samples: {len(X_labeled):,}")
print(f"   Unlabeled samples: {len(X_unlabeled):,}")
print(f"   Original training size: {train_split_size:,}")

# Reconstruct X and y in chronological order
X_train_reconstructed = np.zeros((train_split_size, *X_labeled.shape[1:]), dtype=X_labeled.dtype)
y_train_reconstructed = np.zeros(train_split_size, dtype=y_labeled.dtype)

# Place data back at original positions
X_train_reconstructed[labeled_indices] = X_labeled
y_train_reconstructed[labeled_indices] = y_labeled
X_train_reconstructed[unlabeled_indices] = X_unlabeled
y_train_reconstructed[unlabeled_indices] = y_unlabeled

# Create supervision mask (1 = labeled/supervised, 0 = unlabeled/unsupervised)
supervision_mask = np.zeros(train_split_size, dtype=np.int8)
supervision_mask[labeled_indices] = 1

print(f"\n‚úÖ Reconstructed training data:")
print(f"   Shape: {X_train_reconstructed.shape}")
print(f"   Supervised positions: {np.sum(supervision_mask):,} ({np.sum(supervision_mask)/train_split_size*100:.2f}%)")
print(f"   Unsupervised positions: {np.sum(1-supervision_mask):,} ({np.sum(1-supervision_mask)/train_split_size*100:.2f}%)")

# Verify reconstruction
y_train_original = np.load("y_train_seq.npy")
if np.array_equal(y_train_reconstructed, y_train_original):
    print("‚úÖ VERIFICATION PASSED: Reconstruction matches original!")
else:
    print("‚ö†Ô∏è  WARNING: Reconstruction mismatch detected!")

print("="*70 + "\n")


# ============================================================================
# STEP 2: PPO Environment (Chronological Sequential)
# ============================================================================

class PPOAEEnvChronological(gym.Env):
    """
    Sequential environment that processes windows in chronological order.
    Uses supervision_mask to determine which positions have labels.
    """
    def __init__(self, X_train, y_train, supervision_mask, ae_model,
                 embeddings, lambda_int=1.0, max_steps=200):
        super().__init__()
        self.X_train = X_train
        self.y_train = y_train
        self.supervision_mask = supervision_mask
        self.ae_model = ae_model
        self.device = next(ae_model.parameters()).device
        self.lambda_int = lambda_int
        self.max_steps = max_steps
        self.steps = 0
        self.idx = 0
        self.embeddings = embeddings

        emb_dim = self.embeddings.shape[1]
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, 
            shape=(emb_dim + 1,), 
            dtype=np.float32
        )
        self.action_space = spaces.Discrete(2)  # 0 = normal, 1 = anomaly

        # Statistics
        n_supervised = np.sum(supervision_mask)
        n_unsupervised = len(supervision_mask) - n_supervised
        print(f"\nüîπ Environment initialized:")
        print(f"   Total windows: {len(X_train):,}")
        print(f"   Supervised: {n_supervised:,} ({n_supervised/len(X_train)*100:.2f}%)")
        print(f"   Unsupervised: {n_unsupervised:,} ({n_unsupervised/len(X_train)*100:.2f}%)")

    def _ae_error(self, x):
        """Compute normalized reconstruction error."""
        with torch.no_grad():
            x = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to(self.device)
            recon = self.ae_model(x)
            loss = F.mse_loss(recon, x, reduction="mean").item()
        norm = np.tanh((loss - ae_mean) / (ae_std + 1e-8))
        return max(0, norm)

    def reset(self, *, seed=None, options=None):
        """Reset to start of sequence (or random position 10% of time)."""
        super().reset(seed=seed)
        self.steps = 0
        
        # Start from beginning or random position
        if np.random.rand() < 0.1:  # 10% random start for variety
            self.idx = np.random.randint(0, len(self.X_train))
        else:
            self.idx = 0
        
        emb = self.embeddings[self.idx]
        err = np.array([self._ae_error(self.X_train[self.idx])])
        obs = np.concatenate([emb, err]).astype(np.float32)
        return obs, {}

    def step(self, action):
        """Take action on current window, move to next chronologically."""
        self.steps += 1
        
        # Current window
        x = self.X_train[self.idx]
        true_label = self.y_train[self.idx]
        is_supervised = self.supervision_mask[self.idx]
        
        # Compute reconstruction error
        err = self._ae_error(x)
        
        # ========== REWARD CALCULATION ==========
        
        # External reward (only for supervised positions)
        if is_supervised == 1:
            # We have ground truth label
            external_reward = 1.0 if action == true_label else -0.5
        else:
            # No supervision - no external reward
            external_reward = 0.0
        
        # Intrinsic reward (based on reconstruction error)
        # Encourage anomaly prediction when error is high
        if action == 1:  # Predicted anomaly
            intrinsic_reward = self.lambda_int * err
            # Penalize weak false positives
            if err < 0.05:
                intrinsic_reward -= 0.5
        else:  # Predicted normal
            intrinsic_reward = 0.0
        
        # Total reward
        reward = external_reward + intrinsic_reward
        reward = np.clip(reward, -5, 5)
        
        # ========== MOVE TO NEXT WINDOW (CHRONOLOGICALLY) ==========
        self.idx = (self.idx + 1) % len(self.X_train)
        
        # Next observation
        next_emb = self.embeddings[self.idx]
        next_err = np.array([self._ae_error(self.X_train[self.idx])])
        obs = np.concatenate([next_emb, next_err]).astype(np.float32)
        
        done = self.steps >= self.max_steps
        
        return obs, reward, done, False, {}


# ============================================================================
# STEP 3: Precompute embeddings
# ============================================================================

def compute_embeddings(X):
    """Compute embeddings for all windows."""
    with torch.no_grad():
        tensors = torch.tensor(X, dtype=torch.float32).to(device)
        embeds = ae_model.encode(tensors).cpu().numpy()
    return embeds

print("üîπ Precomputing embeddings for reconstructed data...")
embeddings_train = compute_embeddings(X_train_reconstructed)
print(f"‚úÖ Embeddings computed: {embeddings_train.shape}")


# ============================================================================
# STEP 4: Create VecEnv + Standard PPO
# ============================================================================

def make_env(rank):
    def _init():
        env = PPOAEEnvChronological(
            X_train_reconstructed, 
            y_train_reconstructed, 
            supervision_mask,
            ae_model,
            embeddings_train,
            lambda_int=0.8, 
            max_steps=200
        )
        return Monitor(env, f"logs/env_{rank}")
    return _init

num_envs = 1
vec_env = DummyVecEnv([make_env(i) for i in range(num_envs)])
vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=True, clip_obs=10.)

# Standard MLP policy (no LSTM)
policy_kwargs = dict(
    net_arch=dict(pi=[256, 256], vf=[512, 512, 256])  # Same architecture as recurrent, but no LSTM
)

model = PPO(
    "MlpPolicy",  # Standard MLP policy (non-recurrent)
    vec_env,
    verbose=1,
    learning_rate=1e-4,
    n_steps=64,
    batch_size=64,
    n_epochs=10,
    gamma=0.99,
    gae_lambda=0.95,
    ent_coef=0.02,
    clip_range=0.2,
    policy_kwargs=policy_kwargs,
    tensorboard_log="logs_standard_ppo_ae_chronological/"
)

print("\n" + "="*70)
print("üöÄ Starting Standard PPO training with chronological data")
print("="*70 + "\n")


# ============================================================================
# STEP 5: Train Standard PPO
# ============================================================================

model.learn(total_timesteps=500_000)
model.save("standard_ppo_ae_chronological")
vec_env.save("vec_normalize_standard_ppo_chronological.pkl")

print("\n‚úÖ Training complete! Chronological Standard PPO model saved.")
print("   Model: standard_ppo_ae_chronological.zip")
print("   VecNormalize: vec_normalize_standard_ppo_chronological.pkl")

In [None]:
# Test 20% unseen data

#Recurrent PPO
# Evaluation
print("üìä Evaluating on test set...")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve
)

from typing import NamedTuple, Tuple
import torch
from torch import Tensor
import torch.nn.functional as F

class LSTMStates(NamedTuple):
    pi: Tuple[Tensor, Tensor]
    vf: Tuple[Tensor, Tensor]


# Your make_obs_array function (unchanged)
def make_obs_array(X_windows):
    obs_list = []
    with torch.no_grad():
        for x in X_windows:
            t = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to(device)
            emb = ae_model.encode(t).cpu().numpy().flatten()
            recon = ae_model(t)
            loss = F.mse_loss(recon, t, reduction="mean").item()
            err = np.tanh((loss - ae_mean) / (ae_std + 1e-8))
            err = float(max(0.0, err))
            obs = np.concatenate([emb, [err]]).astype(np.float32)
            obs_list.append(obs)
    return np.array(obs_list)

obs_test = make_obs_array(X_test)

# Inference loop
y_pred = []
y_probs = []
lstm_states = None
episode_start = True

print("Running inference...")
for obs in obs_test:
    obs_tensor = torch.tensor(obs.reshape(1, -1), dtype=torch.float32).to(device)
    episode_start_tensor = torch.tensor([episode_start], dtype=torch.float32).to(device)

    if lstm_states is None:
        n_layers = model.policy.lstm_actor.num_layers
        hidden_size = model.policy.lstm_actor.hidden_size
        batch_size = 1
        zeros = torch.zeros((n_layers, batch_size, hidden_size), device=device)
        lstm_states = LSTMStates(pi=(zeros, zeros), vf=(zeros, zeros))

    with torch.no_grad():
        distribution, new_lstm_states_pi = model.policy.get_distribution(
            obs_tensor, lstm_states.pi, episode_start_tensor
        )
        latent_vf, new_lstm_states_vf = model.policy._process_sequence(
            obs_tensor, lstm_states.vf, episode_start_tensor, model.policy.lstm_critic
        )

    lstm_states = LSTMStates(pi=new_lstm_states_pi, vf=new_lstm_states_vf)

    act = distribution.mode()  # deterministic
    prob_anomaly = distribution.distribution.probs[0, 1].item()

    y_pred.append(int(act.item()))
    y_probs.append(prob_anomaly)

    episode_start = False

y_pred = np.array(y_pred)
y_probs = np.array(y_probs)

# === Metrics ===
cm = confusion_matrix(y_test, y_pred)  # [[TN, FP], [FN, TP]]
tn, fp, fn, tp = cm.ravel()

precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)
auc = roc_auc_score(y_test, y_probs)

# === Heatmap Confusion Matrix (Professional Style) ===
cm_df = pd.DataFrame(
    cm,
    index=['True Normal (0)', 'True Anomaly (1)'],
    columns=['Pred Normal (0)', 'Pred Anomaly (1)']
)

plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', linewidths=1, linecolor='black',
            cbar_kws={'label': 'Count'}, annot_kws={"size": 20, "weight": "bold"})
plt.title('Confusion Matrix (Recurrent PPO)', fontsize=16, pad=20)
plt.ylabel('True Label', fontsize=14)
plt.xlabel('Predicted Label', fontsize=14)
plt.tight_layout()
plt.show()

# === Print Metrics ===
print(f"\nPrecision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-Score  : {f1:.4f}")
print(f"AUC       : {auc:.4f}")
print(f"TP: {tp:,} | FP: {fp:,} | FN: {fn:,} | TN: {tn:,}")

# === ROC Curve (Included as requested) ===
fpr, tpr, _ = roc_curve(y_test, y_probs)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.4f})', linewidth=3, color='#2E86AB')
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier', alpha=0.7)
plt.xlabel('False Positive Rate', fontsize=14)
plt.ylabel('True Positive Rate', fontsize=14)
plt.title('Receiver Operating Characteristic (ROC) Curve (Recurrent PPO)', fontsize=16, pad=20)
plt.legend(loc='lower right', fontsize=12)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

print("‚úÖ Evaluation complete.")


In [None]:
# Evaluation for Quantile Regression DQN (QR-DQN)
print("üìä Evaluating QR-DQN...")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve
)


# Load test data
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")  # binary: 0 = normal, 1 = anomaly


# Reuse observation creation function (same as training)
def make_obs_array(X_windows):
    obs_list = []
    with torch.no_grad():
        for x in X_windows:
            t = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to(device)
            emb = ae_model.encode(t).cpu().numpy().flatten()
            recon = ae_model(t)
            loss = F.mse_loss(recon, t, reduction="mean").item()
            err = np.tanh((loss - ae_mean) / (ae_std + 1e-8))
            err = float(max(0.0, err))
            obs = np.concatenate([emb, [err]]).astype(np.float32)
            obs_list.append(obs)
    return np.array(obs_list)

print("Preparing test observations...")
obs_test = make_obs_array(X_test)

# === Inference for QR-DQN ===
y_pred = []
y_probs = []
deterministic = True

print("Running inference...")
for obs in obs_test:
    obs_reshaped = obs.reshape(1, -1)

    # Deterministic action (argmax over mean quantile)
    action, _ = model.predict(obs_reshaped, deterministic=deterministic)
    y_pred.append(int(action[0]))

    # Get probability-like score for anomaly (action=1)
    with torch.no_grad():
        obs_tensor = torch.as_tensor(obs_reshaped, dtype=torch.float32).to(device)

        # ‚îÄ‚îÄ‚îÄ Correct access for sb3-contrib QRDQN ‚îÄ‚îÄ‚îÄ
        quantiles = model.policy.quantile_net(obs_tensor)   # [1, 200, 2]

        # Average across quantiles ‚Üí approximate expected Q-value
        q_mean = quantiles.mean(dim=1)                       # [1, 2]

        probs = F.softmax(q_mean, dim=1)
        prob_anomaly = probs[0, 1].item()                    # probability of anomaly (action 1)
        y_probs.append(prob_anomaly)

y_pred = np.array(y_pred)
y_probs = np.array(y_probs)

# === Metrics ===
cm = confusion_matrix(y_test, y_pred)  # [[TN, FP], [FN, TP]]
tn, fp, fn, tp = cm.ravel()

precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)
auc = roc_auc_score(y_test, y_probs)

# === Heatmap Confusion Matrix (Consistent Professional Style) ===
cm_df = pd.DataFrame(
    cm,
    index=['True Normal (0)', 'True Anomaly (1)'],
    columns=['Pred Normal (0)', 'Pred Anomaly (1)']
)

plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', linewidths=1, linecolor='black',
            cbar_kws={'label': 'Count'}, annot_kws={"size": 20, "weight": "bold"})
plt.title('Confusion Matrix (QR-DQN)', fontsize=16, pad=20)
plt.ylabel('True Label', fontsize=14)
plt.xlabel('Predicted Label', fontsize=14)
plt.tight_layout()
plt.show()

# === Print Metrics ===
print(f"\nPrecision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-Score  : {f1:.4f}")
print(f"AUC       : {auc:.4f}")
print(f"TP: {tp:,} | FP: {fp:,} | FN: {fn:,} | TN: {tn:,}")

# === ROC Curve ===
fpr, tpr, _ = roc_curve(y_test, y_probs)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.4f})', linewidth=3, color='#2E86AB')
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier', alpha=0.7)
plt.xlabel('False Positive Rate', fontsize=14)
plt.ylabel('True Positive Rate', fontsize=14)
plt.title('Receiver Operating Characteristic (ROC) Curve (QR-DQN)', fontsize=16, pad=20)
plt.legend(loc='lower right', fontsize=12)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

print("‚úÖ QR-DQN Evaluation complete.")

In [None]:
# Evaluation for Non-Recurrent PPO
print("üìä Evaluating PPO...")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve
)
import torch

# Load test data
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")  # binary: 0 = normal, 1 = anomaly

# Reuse make_obs_array function (same as training)
def make_obs_array(X_windows):
    obs_list = []
    with torch.no_grad():
        for x in X_windows:
            t = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to(device)
            emb = ae_model.encode(t).cpu().numpy().flatten()
            recon = ae_model(t)
            loss = F.mse_loss(recon, t, reduction="mean").item()
            err = np.tanh((loss - ae_mean) / (ae_std + 1e-8))
            err = float(max(0.0, err))
            obs = np.concatenate([emb, [err]]).astype(np.float32)
            obs_list.append(obs)
    return np.array(obs_list)

print("Preparing test observations...")
obs_test = make_obs_array(X_test)

# === Inference for PPO ===
y_pred = []
y_probs = []
deterministic = True  # usually best for evaluation

print("Running inference...")
for obs in obs_test:
    obs_reshaped = obs.reshape(1, -1)

    # Get deterministic action
    action, _ = model.predict(obs_reshaped, deterministic=deterministic)
    y_pred.append(int(action[0]))

    # Get probability of anomaly (action = 1)
    with torch.no_grad():
        obs_tensor = torch.tensor(obs_reshaped, dtype=torch.float32).to(device)
        distribution = model.policy.get_distribution(obs_tensor)
        prob_anomaly = distribution.distribution.probs[0, 1].item()  # P(action=1)
        y_probs.append(prob_anomaly)

y_pred = np.array(y_pred)
y_probs = np.array(y_probs)

# === Metrics ===
cm = confusion_matrix(y_test, y_pred)  # [[TN, FP], [FN, TP]]
tn, fp, fn, tp = cm.ravel()

precision = precision_score(y_test, y_pred, zero_division=0)
recall    = recall_score(y_test, y_pred, zero_division=0)
f1        = f1_score(y_test, y_pred, zero_division=0)
auc       = roc_auc_score(y_test, y_probs)

# === Heatmap Confusion Matrix (Consistent Professional Style) ===
cm_df = pd.DataFrame(
    cm,
    index=['True Normal (0)', 'True Anomaly (1)'],
    columns=['Pred Normal (0)', 'Pred Anomaly (1)']
)

plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', linewidths=1, linecolor='black',
            cbar_kws={'label': 'Count'}, annot_kws={"size": 20, "weight": "bold"})
plt.title('Confusion Matrix (PPO)', fontsize=16, pad=20)
plt.ylabel('True Label', fontsize=14)
plt.xlabel('Predicted Label', fontsize=14)
plt.tight_layout()
plt.show()

# === Print Metrics ===
print(f"\nPrecision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-Score  : {f1:.4f}")
print(f"AUC       : {auc:.4f}")
print(f"TP: {tp:,} | FP: {fp:,} | FN: {fn:,} | TN: {tn:,}")

# === ROC Curve ===
fpr, tpr, _ = roc_curve(y_test, y_probs)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.4f})', linewidth=3, color='#2E86AB')
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier', alpha=0.7)
plt.xlabel('False Positive Rate', fontsize=14)
plt.ylabel('True Positive Rate', fontsize=14)
plt.title('Receiver Operating Characteristic (ROC) Curve (PPO)', fontsize=16, pad=20)
plt.legend(loc='lower right', fontsize=12)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

print("‚úÖ PPO Evaluation complete.")

In [None]:
# Evaluation for DQN
print("üìä Evaluating DQN...")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix, precision_score, recall_score, f1_score,
    roc_auc_score, roc_curve
)
import torch
import torch.nn.functional as F

# Load test data
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")  # binary: 0 = normal, 1 = anomaly

# Reuse make_obs_array function
def make_obs_array(X_windows):
    obs_list = []
    with torch.no_grad():
        for x in X_windows:
            t = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to(device)
            emb = ae_model.encode(t).cpu().numpy().flatten()
            recon = ae_model(t)
            loss = F.mse_loss(recon, t, reduction="mean").item()
            err = np.tanh((loss - ae_mean) / (ae_std + 1e-8))
            err = float(max(0.0, err))
            obs = np.concatenate([emb, [err]]).astype(np.float32)
            obs_list.append(obs)
    return np.array(obs_list)

obs_test = make_obs_array(X_test)

# === Inference for DQN ===
y_pred = []
y_probs = []
deterministic = True

print("Running inference...")
for obs in obs_test:
    obs_reshaped = obs.reshape(1, -1)

    # Deterministic action (argmax Q-value)
    action, _ = model.predict(obs_reshaped, deterministic=deterministic)
    y_pred.append(int(action[0]))

    # Softmax over Q-values for probability of anomaly (action=1)
    with torch.no_grad():
        obs_tensor = torch.tensor(obs_reshaped, dtype=torch.float32).to(device)
        q_values = model.q_net(obs_tensor)  # Shape: (1, 2)
        probs = F.softmax(q_values, dim=1)
        prob_anomaly = probs[0, 1].item()
        y_probs.append(prob_anomaly)

y_pred = np.array(y_pred)
y_probs = np.array(y_probs)

# === Metrics ===
cm = confusion_matrix(y_test, y_pred)  # [[TN, FP], [FN, TP]]
tn, fp, fn, tp = cm.ravel()

precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)
auc = roc_auc_score(y_test, y_probs)

# === Heatmap Confusion Matrix (Consistent Professional Style) ===
cm_df = pd.DataFrame(
    cm,
    index=['True Normal (0)', 'True Anomaly (1)'],
    columns=['Pred Normal (0)', 'Pred Anomaly (1)']
)

plt.figure(figsize=(8, 6))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', linewidths=1, linecolor='black',
            cbar_kws={'label': 'Count'}, annot_kws={"size": 20, "weight": "bold"})
plt.title('Confusion Matrix (DQN)', fontsize=16, pad=20)
plt.ylabel('True Label', fontsize=14)
plt.xlabel('Predicted Label', fontsize=14)
plt.tight_layout()
plt.show()

# === Print Metrics ===
print(f"\nPrecision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1-Score  : {f1:.4f}")
print(f"AUC       : {auc:.4f}")
print(f"TP: {tp:,} | FP: {fp:,} | FN: {fn:,} | TN: {tn:,}")

# === ROC Curve ===
fpr, tpr, _ = roc_curve(y_test, y_probs)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc:.4f})', linewidth=3, color='#2E86AB')
plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier', alpha=0.7)
plt.xlabel('False Positive Rate', fontsize=14)
plt.ylabel('True Positive Rate', fontsize=14)
plt.title('Receiver Operating Characteristic (ROC) Curve (DQN)', fontsize=16, pad=20)
plt.legend(loc='lower right', fontsize=12)
plt.grid(alpha=0.3)
plt.tight_layout()
plt.show()

print("‚úÖ DQN Evaluation complete.")

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from collections import Counter

# Set random seed for reproducibility
np.random.seed(42)

# -----------------------------
# 1. Load dataset
# -----------------------------
print("üìÇ Loading dataset...")
df = pd.read_csv("EdgeIIoT-dataset.csv")

# Drop irrelevant identifier/payload columns
drop_cols = [
    "ip.src_host", "ip.dst_host", "arp.dst.proto_ipv4", "arp.src.proto_ipv4",
    "http.file_data", "http.request.uri.query", "http.referer",
    "http.request.full_uri", "tcp.options", "tcp.payload",
    "dns.qry.name", "dns.qry.name.len", "mqtt.msg"
]
df = df.drop(columns=drop_cols, errors="ignore")

# -----------------------------
# 2. Separate labels AND keep attack types
# -----------------------------
labels = df[["Attack_label", "Attack_type"]].copy()
attack_types_original = df["Attack_type"].values  # *** NEW: Keep original attack types ***
df = df.drop(columns=["Attack_label", "Attack_type"], errors="ignore")

# -----------------------------
# 3. Encode categorical columns
# -----------------------------
categorical_cols = df.select_dtypes(include=["object"]).columns.drop("frame.time", errors="ignore")
for col in categorical_cols:
    n_unique = df[col].nunique()
    if n_unique < 50:
        le = LabelEncoder()
        df[col] = le.fit_transform(df[col].astype(str))
    else:
        freq = df[col].value_counts()
        df[col] = df[col].map(freq)

# -----------------------------
# 4. Scale numerical features
# -----------------------------
scaler = StandardScaler()
features = df.drop(columns=["frame.time"], errors="ignore")
features_scaled = scaler.fit_transform(features)

X = pd.DataFrame(features_scaled, columns=features.columns)
X["Attack_label"] = labels["Attack_label"].values
X["Attack_type"] = labels["Attack_type"].values
if "frame.time" in df.columns:
    X["frame.time"] = df["frame.time"].values

# Sort chronologically (important for sequential structure)
if "frame.time" in X.columns:
    X = X.sort_values("frame.time").reset_index(drop=True)
    # *** IMPORTANT: Also reorder attack_types_original to match ***
    attack_types_original = X["Attack_type"].values

print(f"‚úÖ Preprocessed features: {X.shape[1]} columns")

# -----------------------------
# 5. Sliding window (stride = 4) WITH ATTACK TYPE TRACKING
# -----------------------------
def sliding_window_with_attack_types(data, binary_labels, attack_types, 
                                      window=32, step=4, anomaly_threshold=0.4):
    """
    Create sliding windows with robust anomaly labeling AND track attack types.
    
    Args:
        data: Feature data
        binary_labels: Binary labels (0=normal, 1=anomaly)
        attack_types: Attack type strings for each packet
        window: Window size (number of time steps)
        step: Stride between windows
        anomaly_threshold: Minimum proportion of anomalies needed (default: 0.4 = 40%)
    
    Returns:
        X_seq: Array of windows (n_windows, window, n_features)
        y_seq: Binary labels for windows (n_windows,)
        attack_type_seq: Most prevalent attack type per window (n_windows,)
    """
    X_seq, y_seq, attack_type_seq = [], [], []
    
    for start in range(0, len(data) - window, step):
        end = start + window
        X_seq.append(data.iloc[start:end].values)
        
        # Calculate proportion of anomalies in this window
        window_labels = binary_labels.iloc[start:end]
        anomaly_ratio = window_labels.sum() / len(window_labels)
        
        # Label as anomalous only if >= threshold
        y_seq.append(int(anomaly_ratio >= anomaly_threshold))
        
        # *** NEW: Determine most prevalent attack type in window ***
        window_attack_types = attack_types[start:end]
        most_common_attack = Counter(window_attack_types).most_common(1)[0][0]
        attack_type_seq.append(most_common_attack)
    
    return (np.array(X_seq, dtype=np.float32), 
            np.array(y_seq, dtype=np.int8),
            np.array(attack_type_seq))

print("ü™ü Generating sliding windows (window=32, step=4, ‚â•40% anomalies required)...")
X_seq, y_seq, attack_type_seq = sliding_window_with_attack_types(
    X.drop(columns=["Attack_label", "Attack_type", "frame.time"], errors="ignore"),
    X["Attack_label"],
    attack_types_original,  # *** NEW: Pass attack types ***
    window=32,
    step=4,
    anomaly_threshold=0.4  # 40% threshold
)

print(f"‚úÖ Total windows: {len(X_seq):,}")
print(f"‚úÖ Window shape: {X_seq.shape[1:]} (time steps √ó features)")
print(f"‚úÖ Attack types tracked: {len(np.unique(attack_type_seq))} unique types")

# Show attack type distribution
print("\nüìä Attack type distribution in all windows:")
attack_counts = Counter(attack_type_seq)
for attack_type, count in attack_counts.most_common(10):  # Top 10
    pct = count / len(attack_type_seq) * 100
    print(f"  {attack_type:30s}: {count:8,} ({pct:5.2f}%)")
if len(attack_counts) > 10:
    print(f"  ... and {len(attack_counts) - 10} more attack types")

# -----------------------------
# 6. Chronological Train/Test Split (80/20, time-based)
# -----------------------------
split_idx = int(0.8 * len(X_seq))  # First 80% train, last 20% test
X_train_seq = X_seq[:split_idx]
y_train_seq = y_seq[:split_idx]
attack_types_train = attack_type_seq[:split_idx]  # *** NEW: Split attack types ***

X_test = X_seq[split_idx:]
y_test = y_seq[split_idx:]
attack_types_test = attack_type_seq[split_idx:]  # *** NEW: Split attack types ***

# From train: Create ConvAE + PPO sets
anomaly_indices = np.where(y_train_seq == 1)[0]
num_labeled = int(0.05 * len(anomaly_indices))  # 5% labeled anomalies

labeled_anom_idx = np.random.choice(anomaly_indices, num_labeled, replace=False)
X_labeled = X_train_seq[labeled_anom_idx]
y_labeled = y_train_seq[labeled_anom_idx]

unlabeled_idx = np.setdiff1d(np.arange(len(X_train_seq)), labeled_anom_idx)
X_unlabeled = X_train_seq[unlabeled_idx]
y_unlabeled = y_train_seq[unlabeled_idx]  # For sanity checks only; hide in training

# ConvAE trained only on normal sequences from train
normal_indices = np.where(y_train_seq == 0)[0]
X_ae = X_train_seq[normal_indices]

# -----------------------------
# 7. Save outputs WITH INDICES AND ATTACK TYPES
# -----------------------------
np.save("X_ae.npy", X_ae)
np.save("X_unlabeled.npy", X_unlabeled)
np.save("y_unlabeled.npy", y_unlabeled)
np.save("X_labeled.npy", X_labeled)
np.save("y_labeled.npy", y_labeled)
np.save("X_test.npy", X_test)
np.save("y_test.npy", y_test)
np.save("y_train_seq.npy", y_train_seq)

# *** NEW: Save attack types ***
np.save("attack_types_test.npy", attack_types_test)
np.save("attack_types_train.npy", attack_types_train)

# Save indices for reconstruction
np.save("labeled_indices.npy", labeled_anom_idx)
np.save("unlabeled_indices.npy", unlabeled_idx)
np.save("train_split_size.npy", np.array([len(X_train_seq)]))

print("\nüíæ Saved arrays:")
print(f"  X_ae         : {X_ae.shape} ({X_ae.nbytes / 1e9:.2f} GB)")
print(f"  X_unlabeled  : {X_unlabeled.shape} ({X_unlabeled.nbytes / 1e9:.2f} GB)")
print(f"  X_labeled    : {X_labeled.shape} ({X_labeled.nbytes / 1e9:.2f} GB)")
print(f"  X_test       : {X_test.shape} ({X_test.nbytes / 1e9:.2f} GB)")
print(f"\n  attack_types_test  : {attack_types_test.shape}")
print(f"  attack_types_train : {attack_types_train.shape}")
print(f"  labeled_indices    : {labeled_anom_idx.shape}")
print(f"  unlabeled_indices  : {unlabeled_idx.shape}")

# -----------------------------
# 8. Dataset Statistics
# -----------------------------
print("\n" + "="*60)
print("üìä DATASET STATISTICS")
print("="*60)

# Training split statistics
train_normal = np.sum(y_train_seq == 0)
train_anomaly = np.sum(y_train_seq == 1)
train_total = len(y_train_seq)
print(f"\nüîπ TRAINING SPLIT (First 80% chronologically):")
print(f"  Total windows    : {train_total:,}")
print(f"  Normal windows   : {train_normal:,} ({train_normal/train_total*100:.2f}%)")
print(f"  Anomaly windows  : {train_anomaly:,} ({train_anomaly/train_total*100:.2f}%)")

# Test split statistics
test_normal = np.sum(y_test == 0)
test_anomaly = np.sum(y_test == 1)
test_total = len(y_test)
print(f"\nüîπ TEST SPLIT (Last 20% chronologically):")
print(f"  Total windows    : {test_total:,}")
print(f"  Normal windows   : {test_normal:,} ({test_normal/test_total*100:.2f}%)")
print(f"  Anomaly windows  : {test_anomaly:,} ({test_anomaly/test_total*100:.2f}%)")

# *** NEW: Attack type statistics for test set ***
print(f"\nüîπ TEST SET ATTACK TYPES:")
test_attack_counts = Counter(attack_types_test)
for attack_type, count in test_attack_counts.most_common():
    pct = count / len(attack_types_test) * 100
    print(f"  {attack_type:30s}: {count:6,} ({pct:5.2f}%)")

# Overall statistics
print(f"\nüîπ OVERALL:")
print(f"  Total windows    : {train_total + test_total:,}")
print(f"  Normal windows   : {train_normal + test_normal:,}")
print(f"  Anomaly windows  : {train_anomaly + test_anomaly:,}")
print(f"  Unique attacks   : {len(np.unique(attack_type_seq))}")

# Labeled data info
print(f"\nüîπ LABELED ANOMALIES (for PPO training):")
print(f"  Labeled anomalies: {len(X_labeled):,} ({len(X_labeled)/train_anomaly*100:.2f}% of train anomalies)")
print(f"  Unlabeled data   : {len(X_unlabeled):,}")
print(f"  ConvAE data      : {len(X_ae):,} (normal sequences only)")

print("\n" + "="*60)
print(f"‚úÖ Preprocessing complete! Ready for ConvAE and PPO training.")
print(f"‚úÖ Index files saved for reconstruction: labeled_indices.npy, unlabeled_indices.npy")
print(f"‚úÖ Attack type information saved: attack_types_test.npy, attack_types_train.npy")

In [None]:
"""
Multi-Model Comparison Across Attack Types
Evaluates DQN, QR-DQN, PPO, and Recurrent PPO on each attack type
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import precision_score, recall_score, f1_score
import torch
import torch.nn.functional as F
from typing import NamedTuple, Tuple
from torch import Tensor

print("="*80)
print("üî¨ MULTI-MODEL ATTACK-TYPE COMPARISON")
print("="*80)

# ============================================================================
# STEP 1: Load test data and attack types
# ============================================================================

print("\nüìÇ Loading test data...")
X_test = np.load("X_test.npy")
y_test = np.load("y_test.npy")
attack_types_test = np.load("attack_types_test.npy")

print(f"‚úÖ Test samples: {len(y_test):,}")
print(f"‚úÖ Attack types: {len(np.unique(attack_types_test))}")


# ============================================================================
# STEP 2: Define helper functions
# ============================================================================

class LSTMStates(NamedTuple):
    pi: Tuple[Tensor, Tensor]
    vf: Tuple[Tensor, Tensor]


def make_obs_array(X_windows, ae_model, ae_mean, ae_std, device):
    """Create observations from windows (shared across all models)"""
    obs_list = []
    with torch.no_grad():
        for x in X_windows:
            t = torch.tensor(x, dtype=torch.float32).unsqueeze(0).to(device)
            emb = ae_model.encode(t).cpu().numpy().flatten()
            recon = ae_model(t)
            loss = F.mse_loss(recon, t, reduction="mean").item()
            err = np.tanh((loss - ae_mean) / (ae_std + 1e-8))
            err = float(max(0.0, err))
            obs = np.concatenate([emb, [err]]).astype(np.float32)
            obs_list.append(obs)
    return np.array(obs_list)


def evaluate_recurrent_ppo(model, obs_test, device):
    """Evaluate Recurrent PPO model"""
    print("  üîÑ Evaluating Recurrent PPO...")
    y_pred = []
    lstm_states = None
    episode_start = True
    
    for obs in obs_test:
        obs_tensor = torch.tensor(obs.reshape(1, -1), dtype=torch.float32).to(device)
        episode_start_tensor = torch.tensor([episode_start], dtype=torch.float32).to(device)
        
        if lstm_states is None:
            n_layers = model.policy.lstm_actor.num_layers
            hidden_size = model.policy.lstm_actor.hidden_size
            batch_size = 1
            zeros = torch.zeros((n_layers, batch_size, hidden_size), device=device)
            lstm_states = LSTMStates(pi=(zeros, zeros), vf=(zeros, zeros))
        
        with torch.no_grad():
            distribution, new_lstm_states_pi = model.policy.get_distribution(
                obs_tensor, lstm_states.pi, episode_start_tensor
            )
            latent_vf, new_lstm_states_vf = model.policy._process_sequence(
                obs_tensor, lstm_states.vf, episode_start_tensor, model.policy.lstm_critic
            )
        
        lstm_states = LSTMStates(pi=new_lstm_states_pi, vf=new_lstm_states_vf)
        act = distribution.mode()
        y_pred.append(int(act.item()))
        episode_start = False
    
    return np.array(y_pred)


def evaluate_standard_ppo(model, obs_test, device):
    """Evaluate Standard PPO model"""
    print("  üîÑ Evaluating Standard PPO...")
    y_pred = []
    
    for obs in obs_test:
        obs_tensor = torch.tensor(obs.reshape(1, -1), dtype=torch.float32).to(device)
        
        with torch.no_grad():
            action, _ = model.predict(obs_tensor.cpu().numpy(), deterministic=True)
        
        y_pred.append(int(action[0]))
    
    return np.array(y_pred)


def evaluate_dqn(model, obs_test, device):
    """Evaluate DQN model"""
    print("  üîÑ Evaluating DQN...")
    y_pred = []
    
    for obs in obs_test:
        obs_reshaped = obs.reshape(1, -1)
        action, _ = model.predict(obs_reshaped, deterministic=True)
        y_pred.append(int(action[0]))
    
    return np.array(y_pred)


def evaluate_qrdqn(model, obs_test, device):
    """Evaluate QR-DQN model"""
    print("  üîÑ Evaluating QR-DQN...")
    y_pred = []
    
    for obs in obs_test:
        obs_reshaped = obs.reshape(1, -1)
        action, _ = model.predict(obs_reshaped, deterministic=True)
        y_pred.append(int(action[0]))
    
    return np.array(y_pred)


# ============================================================================
# STEP 3: Load all models
# ============================================================================

print("\nüì¶ Loading models and preprocessing components...")

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load ConvAE (shared by all models)
from stable_baselines3 import PPO, DQN
from sb3_contrib import QRDQN, RecurrentPPO

print("  Loading ConvAE...")
ae_model = ConvAE(feat_dim=X_test.shape[2], seq_len=X_test.shape[1], latent_dim=128)
ae_model.load_state_dict(torch.load("convAE_best.pth", map_location=device))
ae_model.to(device)
ae_model.eval()

stats = np.load("convAE_stats.npz")
ae_mean, ae_std = stats["mean_err"], stats["std_err"]

# Prepare observations (shared)
print("  Creating observations...")
obs_test = make_obs_array(X_test, ae_model, ae_mean, ae_std, device)

# Load all trained models
models = {}

try:
    print("  Loading DQN...")
    models['DQN'] = DQN.load("dqn_ae_chronological.zip", device=device)
except:
    print("    ‚ö†Ô∏è  DQN model not found")

try:
    print("  Loading QR-DQN...")
    models['QR-DQN'] = QRDQN.load("qrdqn_ae_chronological.zip", device=device)
except:
    print("    ‚ö†Ô∏è  QR-DQN model not found")

try:
    print("  Loading PPO...")
    models['PPO'] = PPO.load("standard_ppo_ae_chronological.zip", device=device)
except:
    print("    ‚ö†Ô∏è  PPO model not found")

try:
    print("  Loading Recurrent PPO...")
    models['Recurrent PPO'] = RecurrentPPO.load("recurrent_ppo_ae_chronological.zip", device=device)
except:
    print("    ‚ö†Ô∏è  Recurrent PPO model not found")

print(f"\n‚úÖ Loaded {len(models)} models: {list(models.keys())}")


# ============================================================================
# STEP 4: Evaluate all models
# ============================================================================

print("\n" + "="*80)
print("üß™ RUNNING EVALUATIONS")
print("="*80)

predictions = {}

for model_name, model in models.items():
    print(f"\n{model_name}:")
    
    if model_name == "Recurrent PPO":
        predictions[model_name] = evaluate_recurrent_ppo(model, obs_test, device)
    elif model_name == "PPO":
        predictions[model_name] = evaluate_standard_ppo(model, obs_test, device)
    elif model_name == "DQN":
        predictions[model_name] = evaluate_dqn(model, obs_test, device)
    elif model_name == "QR-DQN":
        predictions[model_name] = evaluate_qrdqn(model, obs_test, device)
    
    print(f"    ‚úÖ Predictions: {len(predictions[model_name]):,}")


# ============================================================================
# STEP 5: Calculate per-attack metrics for all models
# ============================================================================

print("\n" + "="*80)
print("üìä CALCULATING PER-ATTACK METRICS")
print("="*80)

# Get unique attack types
unique_attacks = sorted(np.unique(attack_types_test))
print(f"\nFound {len(unique_attacks)} attack types:")
for attack in unique_attacks:
    count = (attack_types_test == attack).sum()
    pct = count / len(attack_types_test) * 100
    print(f"  ‚Ä¢ {attack:30s}: {count:6,} ({pct:5.2f}%)")

# Build comprehensive results table
results_data = []

for attack_type in unique_attacks:
    # Filter data for this attack type
    mask = attack_types_test == attack_type
    y_true_attack = y_test[mask]
    n_samples = len(y_true_attack)
    pct_total = n_samples / len(y_test) * 100
    
    # Calculate metrics for each model
    for model_name, y_pred_all in predictions.items():
        y_pred_attack = y_pred_all[mask]
        
        # Calculate metrics
        precision = precision_score(y_true_attack, y_pred_attack, zero_division=0)
        recall = recall_score(y_true_attack, y_pred_attack, zero_division=0)
        f1 = f1_score(y_true_attack, y_pred_attack, zero_division=0)
        
        results_data.append({
            'AttackType': attack_type,
            'Count': n_samples,
            '% of Total': pct_total,
            'Model': model_name,
            'Precision': precision,
            'Recall': recall,
            'F1-Score': f1
        })

# Create DataFrame
results_df = pd.DataFrame(results_data)


# ============================================================================
# STEP 6: Format and display table (EXACTLY like your image)
# ============================================================================

print("\n" + "="*80)
print("üìã RESULTS TABLE (Formatted)")
print("="*80 + "\n")

# Pivot table to match your image format
# Group by attack type and display all models for each attack
for attack_type in unique_attacks:
    attack_data = results_df[results_df['AttackType'] == attack_type]
    
    if len(attack_data) == 0:
        continue
    
    # Get count and percentage (same for all models)
    count = attack_data.iloc[0]['Count']
    pct = attack_data.iloc[0]['% of Total']
    
    # Print attack type header
    print(f"{attack_type:<20} {count:>10,} {pct:>10.2f}%")
    
    # Print each model's metrics
    for _, row in attack_data.iterrows():
        print(f"{'':20} {'':10} {'':10}   "
              f"{row['Model']:<15} "
              f"{row['Precision']:>10.4f} "
              f"{row['Recall']:>10.4f} "
              f"{row['F1-Score']:>10.4f}")
    
    print()  # Blank line between attack types


# ============================================================================
# STEP 7: Save to CSV and Excel (for easy copy-paste)
# ============================================================================

print("="*80)
print("üíæ SAVING RESULTS")
print("="*80)

# Save detailed results
results_df.to_csv('all_models_per_attack_comparison.csv', index=False)
print("‚úÖ Saved: all_models_per_attack_comparison.csv")

# Create pivot table for easier viewing
pivot_df = results_df.pivot_table(
    index=['AttackType', 'Count', '% of Total'],
    columns='Model',
    values=['Precision', 'Recall', 'F1-Score']
)
pivot_df.to_csv('all_models_pivot_table.csv')
print("‚úÖ Saved: all_models_pivot_table.csv")

# Create formatted table exactly like your image
formatted_rows = []
for attack_type in unique_attacks:
    attack_data = results_df[results_df['AttackType'] == attack_type]
    
    if len(attack_data) == 0:
        continue
    
    count = int(attack_data.iloc[0]['Count'])
    pct = attack_data.iloc[0]['% of Total']
    
    for idx, row in attack_data.iterrows():
        formatted_rows.append({
            'AttackType': attack_type if idx == attack_data.index[0] else '',
            'Count': count if idx == attack_data.index[0] else '',
            '% of Total': f"{pct:.2f}%" if idx == attack_data.index[0] else '',
            'Model': row['Model'],
            'Precision': f"{row['Precision']:.4f}",
            'Recall': f"{row['Recall']:.4f}",
            'F1-Score': f"{row['F1-Score']:.4f}"
        })

formatted_df = pd.DataFrame(formatted_rows)
formatted_df.to_csv('formatted_comparison_table.csv', index=False)
print("‚úÖ Saved: formatted_comparison_table.csv (ready for copy-paste!)")

# Also save as Excel for better formatting
try:
    formatted_df.to_excel('formatted_comparison_table.xlsx', index=False)
    print("‚úÖ Saved: formatted_comparison_table.xlsx")
except:
    print("‚ö†Ô∏è  Excel save failed (install openpyxl: pip install openpyxl)")


# ============================================================================
# STEP 8: Visualization - Heatmap comparison
# ============================================================================

print("\n" + "="*80)
print("üìä CREATING VISUALIZATIONS")
print("="*80)

# Create F1-Score heatmap
pivot_f1 = results_df.pivot_table(
    index='AttackType',
    columns='Model',
    values='F1-Score'
)

plt.figure(figsize=(12, max(8, len(unique_attacks) * 0.5)))
sns.heatmap(pivot_f1, annot=True, fmt='.4f', cmap='RdYlGn', 
            vmin=0, vmax=1, linewidths=0.5, cbar_kws={'label': 'F1-Score'})
plt.title('F1-Score Comparison Across Models and Attack Types', fontsize=14, pad=15)
plt.xlabel('Model', fontsize=12)
plt.ylabel('Attack Type', fontsize=12)
plt.tight_layout()
plt.savefig('f1_score_heatmap_all_models.png', dpi=300, bbox_inches='tight')
print("‚úÖ Saved: f1_score_heatmap_all_models.png")
plt.show()

# Create grouped bar chart
fig, axes = plt.subplots(3, 1, figsize=(14, 12))

metrics = ['Precision', 'Recall', 'F1-Score']
for idx, metric in enumerate(metrics):
    ax = axes[idx]
    
    pivot_metric = results_df.pivot_table(
        index='AttackType',
        columns='Model',
        values=metric
    )
    
    pivot_metric.plot(kind='bar', ax=ax, width=0.8)
    ax.set_title(f'{metric} by Attack Type', fontsize=13, pad=10)
    ax.set_xlabel('Attack Type', fontsize=11)
    ax.set_ylabel(metric, fontsize=11)
    ax.legend(title='Model', fontsize=9)
    ax.grid(axis='y', alpha=0.3)
    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.savefig('metrics_comparison_all_models.png', dpi=300, bbox_inches='tight')
print("‚úÖ Saved: metrics_comparison_all_models.png")
plt.show()


# ============================================================================
# STEP 9: Summary statistics
# ============================================================================

print("\n" + "="*80)
print("üìà SUMMARY STATISTICS")
print("="*80)

for model_name in predictions.keys():
    model_data = results_df[results_df['Model'] == model_name]
    
    print(f"\n{model_name}:")
    print(f"  Mean F1-Score: {model_data['F1-Score'].mean():.4f} (¬±{model_data['F1-Score'].std():.4f})")
    print(f"  Best Attack:   {model_data.loc[model_data['F1-Score'].idxmax(), 'AttackType']} "
          f"(F1={model_data['F1-Score'].max():.4f})")
    print(f"  Worst Attack:  {model_data.loc[model_data['F1-Score'].idxmin(), 'AttackType']} "
          f"(F1={model_data['F1-Score'].min():.4f})")

print("\n" + "="*80)
print("‚úÖ COMPARISON COMPLETE!")
print("="*80)
print("\nGenerated files:")
print("  üìÑ all_models_per_attack_comparison.csv")
print("  üìÑ formatted_comparison_table.csv (‚Üê USE THIS for tables!)")
print("  üìÑ formatted_comparison_table.xlsx")
print("  üìä f1_score_heatmap_all_models.png")
print("  üìä metrics_comparison_all_models.png")