In [1]:
"""
Project: Deep Q-Learning Based Anomaly Detection for Kubernetes Environment
Author: Ammar Yousuf Abrahani
Date: June 2025

Description:
This script implements a Deep Q-Learning model to detect anomalies in Kubernetes resource usage data.
The dataset used is synthetically generated to simulate realistic CPU, Memory, Network, and Disk usage patterns.

Adapted from: Concepts inspired by:
- https://github.com/gcamfer/Anomaly-ReactionRL
- https://www.tensorflow.org/agents
- Custom implementation written with assistance and guidance.
"""


import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # Disable GPU

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score


In [2]:
# Load the synthetic Kubernetes dataset
data = pd.read_csv('../../k8_synthetic_dataset.csv')

# Load the synthetic Kubernetes dataset
data.head()


Unnamed: 0,cpu_usage,memory_usage,network_io,disk_io,label
0,54.967142,41.71005,337.849431,107.373466,0.0
1,48.617357,44.39819,253.891734,92.133224,0.0
2,56.476885,57.472936,343.480296,100.574896,0.0
3,65.230299,56.103703,367.781893,125.569037,0.0
4,47.658466,49.790984,320.671745,103.821981,0.0


In [3]:
# Extract features and labels
X = data[['cpu_usage', 'memory_usage', 'network_io', 'disk_io']].values
y = data['label'].values.astype(int)


In [4]:
# Split dataset into training and testing (70% - 30%)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

print(f"Training samples: {len(X_train)}")
print(f"Testing samples: {len(X_test)}")


Training samples: 210
Testing samples: 90


In [5]:
# Define Q-network
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(2, activation='linear')  # Q-values for actions: 0 (Normal), 1 (Anomaly)
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')


In [7]:
# DQL settings
episodes = 5
epsilon = 0.9  # Exploration rate
gamma = 0.95   # Discount factor


In [8]:
# Train the Q-network using tabular data
for episode in range(episodes):
    print(f"Episode {episode + 1}/{episodes}")
    
    for i in range(len(X_train)):
        state = np.reshape(X_train[i], [1, X_train.shape[1]])

        # Epsilon-greedy action selection
        if np.random.rand() < epsilon:
            action = np.random.randint(2)  # Explore
        else:
            q_values = model.predict(state, verbose=0)
            action = np.argmax(q_values[0])  # Exploit

        # Assign reward
        reward = 1 if action == y_train[i] else -1

        # Calculate target using Bellman equation
        target = reward
        if i < len(X_train) - 1:
            next_state = np.reshape(X_train[i + 1], [1, X_train.shape[1]])
            next_q_values = model.predict(next_state, verbose=0)
            target += gamma * np.amax(next_q_values[0])

        # Update Q-value for the selected action
        q_values = model.predict(state, verbose=0)
        q_values[0][action] = target

        model.fit(state, q_values, epochs=1, verbose=0)
    
    # Reduce exploration over time
    epsilon *= 0.95
    print(f"Updated epsilon: {epsilon:.4f}")


Episode 1/5
Updated epsilon: 0.8550
Episode 2/5
Updated epsilon: 0.8122
Episode 3/5
Updated epsilon: 0.7716
Episode 4/5
Updated epsilon: 0.7331
Episode 5/5
Updated epsilon: 0.6964


In [9]:
# Predict class using the trained Q-network
y_pred = []

for state in X_test:
    q_values = model.predict(np.reshape(state, [1, X_test.shape[1]]), verbose=0)

    # Use argmax to choose the class with the highest predicted Q-value
    # 0 = Normal, 1 = Anomaly
    predicted_action = np.argmax(q_values[0])
    
    y_pred.append(predicted_action)


In [10]:
# Generate classification report
report = classification_report(y_test, y_pred, target_names=['Normal', 'Anomaly'], digits=4)

# Calculate Macro Average F1-Score
f1_macro = f1_score(y_test, y_pred, average='macro')

print("📊 Final Deep Q-Learning Performance:\n")
print(report)
print(f"🔍 Macro Average F1-Score: {f1_macro:.4f}")


📊 Final Deep Q-Learning Performance:

              precision    recall  f1-score   support

      Normal     0.8947    0.6145    0.7286        83
     Anomaly     0.0303    0.1429    0.0500         7

    accuracy                         0.5778        90
   macro avg     0.4625    0.3787    0.3893        90
weighted avg     0.8275    0.5778    0.6758        90

🔍 Macro Average F1-Score: 0.3893
