# 🚀 Deep Q-Learning for Anomaly Detection (with Real Kubernetes Dataset)

This notebook applies Deep Q-Learning to your real dataset `k8_synthetic_dataset.csv`, improving anomaly detection using:
- Reward tuning
- Feature normalization
- Class imbalance handling


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from collections import deque
import random
import torch
import torch.nn as nn
import torch.optim as optim


In [2]:
# Load your dataset
df = pd.read_csv("../data/raw/k8_synthetic_dataset.csv")

# Display basic info
print(df.head())

# Assume features and label
features = ['cpu_usage', 'memory_usage', 'network_io', 'disk_io']
label_col = 'label'  # adjust if your label column is named differently

X = df[features].values
y = df[label_col].values

# Normalize
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Print distribution
unique, counts = np.unique(y, return_counts=True)
print("Class distribution:", dict(zip(unique, counts)))


   cpu_usage  memory_usage  network_io     disk_io  label
0  54.967142     41.710050  337.849431  107.373466    0.0
1  48.617357     44.398190  253.891734   92.133224    0.0
2  56.476885     57.472936  343.480296  100.574896    0.0
3  65.230299     56.103703  367.781893  125.569037    0.0
4  47.658466     49.790984  320.671745  103.821981    0.0
Class distribution: {0.0: 285, 1.0: 15}


In [3]:
from sklearn.model_selection import train_test_split

# Split into train and test (stratify to preserve class ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Train samples: {len(X_train)}, Test samples: {len(X_test)}")


Train samples: 240, Test samples: 60


In [4]:
class QNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(QNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.fc(x)


In [5]:
class RealDeepQLWrapper:
    def __init__(self, input_dim, episodes=100, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.99, gamma=0.95):
        self.episodes = episodes
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.gamma = gamma
        self.memory = deque(maxlen=10000)
        self.model = QNetwork(input_dim, 2)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.loss_fn = nn.MSELoss()

    def remember(self, state, action, reward, next_state):
        self.memory.append((state, action, reward, next_state))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.randint(2)
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            q_values = self.model(state)
        return torch.argmax(q_values).item()

    def replay(self, batch_size=32):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state in minibatch:
            target = reward + self.gamma * torch.max(self.model(torch.FloatTensor(next_state)))
            current = self.model(torch.FloatTensor(state))[action]
            loss = self.loss_fn(current, target)
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

    def fit(self, X, y):
        for e in range(self.episodes):
            for i in range(len(X)):
                state = X[i]
                label = y[i]
                action = self.act(state)
                predicted_label = action

                # Custom reward
                if label == 1 and predicted_label == 1:
                    reward = 1.0  # TP
                elif label == 1 and predicted_label == 0:
                    reward = -1.0  # FN
                elif label == 0 and predicted_label == 1:
                    reward = -0.5  # FP
                else:
                    reward = 0.1  # TN

                next_state = X[min(i + 1, len(X) - 1)]
                self.remember(state, action, reward, next_state)
                self.replay()

            self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)
            print(f"Episode {e+1}/{self.episodes}, Epsilon: {self.epsilon:.4f}")

    def predict(self, X):
        y_pred = []
        for i in range(len(X)):
            state = X[i]
            action = self.act(state)
            y_pred.append(action)
        return np.array(y_pred)


In [6]:
dql = RealDeepQLWrapper(input_dim=X.shape[1], episodes=100)
dql.fit(X_train, y_train)
y_pred = dql.predict(X_test)

print("\n📊 Final Deep Q-Learning Performance:\n")
print(classification_report(y_test, y_pred, target_names=["Normal", "Anomaly"]))


Episode 1/100, Epsilon: 0.9900
Episode 2/100, Epsilon: 0.9801
Episode 3/100, Epsilon: 0.9703
Episode 4/100, Epsilon: 0.9606
Episode 5/100, Epsilon: 0.9510
Episode 6/100, Epsilon: 0.9415
Episode 7/100, Epsilon: 0.9321
Episode 8/100, Epsilon: 0.9227
Episode 9/100, Epsilon: 0.9135
Episode 10/100, Epsilon: 0.9044
Episode 11/100, Epsilon: 0.8953
Episode 12/100, Epsilon: 0.8864
Episode 13/100, Epsilon: 0.8775
Episode 14/100, Epsilon: 0.8687
Episode 15/100, Epsilon: 0.8601
Episode 16/100, Epsilon: 0.8515
Episode 17/100, Epsilon: 0.8429
Episode 18/100, Epsilon: 0.8345
Episode 19/100, Epsilon: 0.8262
Episode 20/100, Epsilon: 0.8179
Episode 21/100, Epsilon: 0.8097
Episode 22/100, Epsilon: 0.8016
Episode 23/100, Epsilon: 0.7936
Episode 24/100, Epsilon: 0.7857
Episode 25/100, Epsilon: 0.7778
Episode 26/100, Epsilon: 0.7700
Episode 27/100, Epsilon: 0.7623
Episode 28/100, Epsilon: 0.7547
Episode 29/100, Epsilon: 0.7472
Episode 30/100, Epsilon: 0.7397
Episode 31/100, Epsilon: 0.7323
Episode 32/100, E