# Service 1

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from collections import deque

# Load and preprocess data
df = pd.read_csv("Service1.csv")

features = ['latency_ms', 'cpu_usage_pct', 'memory_usage_pct', 'cpu_allocated', 'memory_allocated']
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

X = df[features].values
cpu_usage = df['cpu_usage_pct'].values
mem_usage = df['memory_usage_pct'].values

# Train-test split
X_train, X_test, cpu_train, cpu_test, mem_train, mem_test = train_test_split(
    X, cpu_usage, mem_usage, test_size=0.2, random_state=42
)

# Discrete action space
action_space = [
    (-1, -1), (-1, 0), (-1, 1),
    (0, -1),  (0, 0),  (0, 1),
    (1, -1),  (1, 0),  (1, 1)
]
n_actions = len(action_space)

# Q-Network definition
class QNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(QNetwork, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.net(x)

# Initialize everything
memory = deque(maxlen=10000)
gamma = 0.99
epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.1
batch_size = 64
lr = 1e-3
n_episodes = 100

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = QNetwork(input_dim=5, output_dim=n_actions).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.MSELoss()

# --- Training Loop ---
for episode in range(n_episodes):
    idx = np.random.randint(len(X_train))
    state_np = X_train[idx]
    state = torch.FloatTensor(state_np).unsqueeze(0).to(device)
    true_cpu = cpu_train[idx]
    true_mem = mem_train[idx]

    for _ in range(10):
        if random.random() < epsilon:
            action_idx = random.randint(0, n_actions - 1)
        else:
            with torch.no_grad():
                q_values = model(state)
                action_idx = torch.argmax(q_values).item()

        cpu_adj, mem_adj = action_space[action_idx]
        predicted_cpu = np.clip(true_cpu + cpu_adj, 0, 1)
        predicted_mem = np.clip(true_mem + mem_adj, 0, 1)

        # Synthetic latency update
        new_latency = state[0][0].item() + 0.05 * cpu_adj - 0.04 * mem_adj
        new_latency = np.clip(new_latency, 0, 1)

        reward = -abs(new_latency - state[0][0].item()) - 0.01 * (predicted_cpu + predicted_mem)

        next_state = torch.FloatTensor([
            new_latency, predicted_cpu, predicted_mem, state[0][3].item(), state[0][4].item()
        ]).unsqueeze(0).to(device)

        memory.append((state, action_idx, reward, next_state))
        state = next_state

    # Learning
    loss_val = None
    if len(memory) >= batch_size:
        batch = random.sample(memory, batch_size)
        states_b, actions_b, rewards_b, next_states_b = zip(*batch)

        states_t = torch.cat(states_b).to(device)
        actions_t = torch.LongTensor(actions_b).unsqueeze(1).to(device)
        rewards_t = torch.FloatTensor(rewards_b).unsqueeze(1).to(device)
        next_states_t = torch.cat(next_states_b).to(device)

        q_values = model(states_t).gather(1, actions_t)
        with torch.no_grad():
            max_next_q = model(next_states_t).max(1)[0].unsqueeze(1)
            target = rewards_t + gamma * max_next_q

        loss = loss_fn(q_values, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_val = loss.item()

    epsilon = max(epsilon_min, epsilon * epsilon_decay)

    if episode % 10 == 0 and loss_val is not None:
        print(f"Episode {episode}, Loss: {loss_val:.4f}, Epsilon: {epsilon:.2f}")

# --- Evaluation on Test Set (Accuracy as avg reward) ---
total_reward = 0
n_test_episodes = 20

for _ in range(n_test_episodes):
    idx = np.random.randint(len(X_test))
    state_np = X_test[idx]
    state = torch.FloatTensor(state_np).unsqueeze(0).to(device)
    true_cpu = cpu_test[idx]
    true_mem = mem_test[idx]

    episode_reward = 0
    for _ in range(5):
        with torch.no_grad():
            q_values = model(state)
            action_idx = torch.argmax(q_values).item()

        cpu_adj, mem_adj = action_space[action_idx]
        predicted_cpu = np.clip(true_cpu + cpu_adj, 0, 1)
        predicted_mem = np.clip(true_mem + mem_adj, 0, 1)

        new_latency = state[0][0].item() + 0.05 * cpu_adj - 0.04 * mem_adj
        new_latency = np.clip(new_latency, 0, 1)

        reward = -abs(new_latency - state[0][0].item()) - 0.01 * (predicted_cpu + predicted_mem)
        episode_reward += reward

        state = torch.FloatTensor([
            new_latency, predicted_cpu, predicted_mem, state[0][3].item(), state[0][4].item()
        ]).unsqueeze(0).to(device)

    total_reward += episode_reward

avg_reward = total_reward / n_test_episodes
print(f"\nAverage Test Reward: {avg_reward:.4f} (Higher is better)")


Episode 10, Loss: 0.0115, Epsilon: 0.95
Episode 20, Loss: 0.0026, Epsilon: 0.90
Episode 30, Loss: 0.0020, Epsilon: 0.86
Episode 40, Loss: 0.0008, Epsilon: 0.81
Episode 50, Loss: 0.0004, Epsilon: 0.77
Episode 60, Loss: 0.0005, Epsilon: 0.74
Episode 70, Loss: 0.0003, Epsilon: 0.70
Episode 80, Loss: 0.0002, Epsilon: 0.67
Episode 90, Loss: 0.0004, Epsilon: 0.63

Average Test Reward: -0.0480 (Higher is better)


In [3]:
from sklearn.metrics import mean_absolute_error

cpu_preds = []
cpu_truth = []
mem_preds = []
mem_truth = []

for _ in range(n_test_episodes):
    idx = np.random.randint(len(X_test))
    state_np = X_test[idx]
    state = torch.FloatTensor(state_np).unsqueeze(0).to(device)
    true_cpu = cpu_test[idx]
    true_mem = mem_test[idx]

    for _ in range(5):
        with torch.no_grad():
            q_values = model(state)
            action_idx = torch.argmax(q_values).item()

        cpu_adj, mem_adj = action_space[action_idx]
        predicted_cpu = np.clip(true_cpu + cpu_adj, 0, 1)
        predicted_mem = np.clip(true_mem + mem_adj, 0, 1)

        # Store for MAE computation
        cpu_preds.append(predicted_cpu)
        cpu_truth.append(true_cpu)
        mem_preds.append(predicted_mem)
        mem_truth.append(true_mem)

        # Update state
        new_latency = state[0][0].item() + 0.05 * cpu_adj - 0.04 * mem_adj
        new_latency = np.clip(new_latency, 0, 1)

        state = torch.FloatTensor([
            new_latency, predicted_cpu, predicted_mem, state[0][3].item(), state[0][4].item()
        ]).unsqueeze(0).to(device)

# --- Compute MAE ---
cpu_mae = mean_absolute_error(cpu_truth, cpu_preds)
mem_mae = mean_absolute_error(mem_truth, mem_preds)

print(f"CPU MAE: {cpu_mae:.4f}")
print(f"Memory MAE: {mem_mae:.4f}")


CPU MAE: 0.3776
Memory MAE: 0.1480


# Service 2

In [4]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from collections import deque

# Load and preprocess data
df = pd.read_csv("Service2.csv")

features = ['latency_ms', 'cpu_usage_pct', 'memory_usage_pct', 'cpu_allocated', 'memory_allocated']
scaler = MinMaxScaler()
df[features] = scaler.fit_transform(df[features])

X = df[features].values
cpu_usage = df['cpu_usage_pct'].values
mem_usage = df['memory_usage_pct'].values

# Train-test split
X_train, X_test, cpu_train, cpu_test, mem_train, mem_test = train_test_split(
    X, cpu_usage, mem_usage, test_size=0.2, random_state=42
)

# Discrete action space
action_space = [
    (-1, -1), (-1, 0), (-1, 1),
    (0, -1),  (0, 0),  (0, 1),
    (1, -1),  (1, 0),  (1, 1)
]
n_actions = len(action_space)

# Q-Network definition
class QNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(QNetwork, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, output_dim)
        )

    def forward(self, x):
        return self.net(x)

# Initialize everything
memory = deque(maxlen=10000)
gamma = 0.99
epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.1
batch_size = 64
lr = 1e-3
n_episodes = 100

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = QNetwork(input_dim=5, output_dim=n_actions).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.MSELoss()

# --- Training Loop ---
for episode in range(n_episodes):
    idx = np.random.randint(len(X_train))
    state_np = X_train[idx]
    state = torch.FloatTensor(state_np).unsqueeze(0).to(device)
    true_cpu = cpu_train[idx]
    true_mem = mem_train[idx]

    for _ in range(10):
        if random.random() < epsilon:
            action_idx = random.randint(0, n_actions - 1)
        else:
            with torch.no_grad():
                q_values = model(state)
                action_idx = torch.argmax(q_values).item()

        cpu_adj, mem_adj = action_space[action_idx]
        predicted_cpu = np.clip(true_cpu + cpu_adj, 0, 1)
        predicted_mem = np.clip(true_mem + mem_adj, 0, 1)

        # Synthetic latency update
        new_latency = state[0][0].item() + 0.05 * cpu_adj - 0.04 * mem_adj
        new_latency = np.clip(new_latency, 0, 1)

        reward = -abs(new_latency - state[0][0].item()) - 0.01 * (predicted_cpu + predicted_mem)

        next_state = torch.FloatTensor([
            new_latency, predicted_cpu, predicted_mem, state[0][3].item(), state[0][4].item()
        ]).unsqueeze(0).to(device)

        memory.append((state, action_idx, reward, next_state))
        state = next_state

    # Learning
    loss_val = None
    if len(memory) >= batch_size:
        batch = random.sample(memory, batch_size)
        states_b, actions_b, rewards_b, next_states_b = zip(*batch)

        states_t = torch.cat(states_b).to(device)
        actions_t = torch.LongTensor(actions_b).unsqueeze(1).to(device)
        rewards_t = torch.FloatTensor(rewards_b).unsqueeze(1).to(device)
        next_states_t = torch.cat(next_states_b).to(device)

        q_values = model(states_t).gather(1, actions_t)
        with torch.no_grad():
            max_next_q = model(next_states_t).max(1)[0].unsqueeze(1)
            target = rewards_t + gamma * max_next_q

        loss = loss_fn(q_values, target)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_val = loss.item()

    epsilon = max(epsilon_min, epsilon * epsilon_decay)

    if episode % 10 == 0 and loss_val is not None:
        print(f"Episode {episode}, Loss: {loss_val:.4f}, Epsilon: {epsilon:.2f}")

# --- Evaluation on Test Set (Accuracy as avg reward) ---
total_reward = 0
n_test_episodes = 20

for _ in range(n_test_episodes):
    idx = np.random.randint(len(X_test))
    state_np = X_test[idx]
    state = torch.FloatTensor(state_np).unsqueeze(0).to(device)
    true_cpu = cpu_test[idx]
    true_mem = mem_test[idx]

    episode_reward = 0
    for _ in range(5):
        with torch.no_grad():
            q_values = model(state)
            action_idx = torch.argmax(q_values).item()

        cpu_adj, mem_adj = action_space[action_idx]
        predicted_cpu = np.clip(true_cpu + cpu_adj, 0, 1)
        predicted_mem = np.clip(true_mem + mem_adj, 0, 1)

        new_latency = state[0][0].item() + 0.05 * cpu_adj - 0.04 * mem_adj
        new_latency = np.clip(new_latency, 0, 1)

        reward = -abs(new_latency - state[0][0].item()) - 0.01 * (predicted_cpu + predicted_mem)
        episode_reward += reward

        state = torch.FloatTensor([
            new_latency, predicted_cpu, predicted_mem, state[0][3].item(), state[0][4].item()
        ]).unsqueeze(0).to(device)

    total_reward += episode_reward

avg_reward = total_reward / n_test_episodes
print(f"\nAverage Test Reward: {avg_reward:.4f} (Higher is better)")


Episode 10, Loss: 0.0057, Epsilon: 0.95
Episode 20, Loss: 0.0037, Epsilon: 0.90
Episode 30, Loss: 0.0172, Epsilon: 0.86
Episode 40, Loss: 0.0713, Epsilon: 0.81
Episode 50, Loss: 0.3977, Epsilon: 0.77
Episode 60, Loss: 1.1821, Epsilon: 0.74
Episode 70, Loss: 3.8682, Epsilon: 0.70
Episode 80, Loss: 11.0945, Epsilon: 0.67
Episode 90, Loss: 32.5011, Epsilon: 0.63

Average Test Reward: -0.1500 (Higher is better)


In [5]:
from sklearn.metrics import mean_absolute_error

cpu_preds = []
cpu_truth = []
mem_preds = []
mem_truth = []

for _ in range(n_test_episodes):
    idx = np.random.randint(len(X_test))
    state_np = X_test[idx]
    state = torch.FloatTensor(state_np).unsqueeze(0).to(device)
    true_cpu = cpu_test[idx]
    true_mem = mem_test[idx]

    for _ in range(5):
        with torch.no_grad():
            q_values = model(state)
            action_idx = torch.argmax(q_values).item()

        cpu_adj, mem_adj = action_space[action_idx]
        predicted_cpu = np.clip(true_cpu + cpu_adj, 0, 1)
        predicted_mem = np.clip(true_mem + mem_adj, 0, 1)

        # Store for MAE computation
        cpu_preds.append(predicted_cpu)
        cpu_truth.append(true_cpu)
        mem_preds.append(predicted_mem)
        mem_truth.append(true_mem)

        # Update state
        new_latency = state[0][0].item() + 0.05 * cpu_adj - 0.04 * mem_adj
        new_latency = np.clip(new_latency, 0, 1)

        state = torch.FloatTensor([
            new_latency, predicted_cpu, predicted_mem, state[0][3].item(), state[0][4].item()
        ]).unsqueeze(0).to(device)

# --- Compute MAE ---
cpu_mae = mean_absolute_error(cpu_truth, cpu_preds)
mem_mae = mean_absolute_error(mem_truth, mem_preds)

print(f"CPU MAE: {cpu_mae:.4f}")
print(f"Memory MAE: {mem_mae:.4f}")


CPU MAE: 0.7317
Memory MAE: 0.9916
