<a href="https://colab.research.google.com/github/e19166/e19-4yp-Dynamic-Multi-Dimensional-Resource-Orchestration-in-Kubernetes/blob/main/Latency_Models/svr/svr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Service 1

In [2]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.2-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.7/242.7 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.2 colorlog-6.9.0 optuna-4.4.0


In [6]:
import pandas as pd
import numpy as np
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import optuna

# Load dataset
df = pd.read_csv("/content/service-1-deployment_dataset.csv")

# Preprocess timestamp
df["Timestamp"] = pd.to_datetime(df["Timestamp"], format = 'mixed')
df = df.sort_values("Timestamp")

# Feature Engineering
df["cpu_allocated"] = df["CPU Request"]
df["memory_allocated"] = df["Memory Request"]
df["cpu_usage_pct"] = df["CPU Usage"] / df["CPU Limit"]
df["memory_usage_pct"] = df["Memory Usage"] / df["Memory Limit"]
df["request_rate_rps"] = df["Request Rate"]
df["latency_p95_t"] = df["Latency"].shift(1)
df["burstiness_score"] = df["Request Rate"].rolling(5).max() - df["Request Rate"].rolling(5).mean()

# Drop rows with NaNs caused by shift/rolling
df.dropna(inplace=True)

# Define features and target
features = [
    "cpu_allocated", "memory_allocated",
    "cpu_usage_pct", "memory_usage_pct",
    "request_rate_rps", "latency_p95_t",
    "burstiness_score"
]
X = df[features]
y = df["Latency"]

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=False
)

# Optuna objective
def objective(trial):
    gamma = trial.suggest_loguniform("gamma", 1e-4, 1.0)
    alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-1)
    learning_rate = trial.suggest_categorical("learning_rate", ["constant", "optimal", "invscaling", "adaptive"])
    eta0 = trial.suggest_loguniform("eta0", 1e-4, 0.1)

    rbf_feature = RBFSampler(gamma=gamma, random_state=42)
    svr_model = SGDRegressor(
        alpha=alpha,
        learning_rate=learning_rate,
        eta0=eta0,
        max_iter=1_000,
        tol=1e-3,
        random_state=42
    )

    pipeline = make_pipeline(rbf_feature, svr_model)
    pipeline.fit(X_train, y_train)
    preds = pipeline.predict(X_test)

    return mean_absolute_error(y_test, preds)

# Run hyperparameter optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Train best model
best_params = study.best_params

rbf_feature = RBFSampler(gamma=best_params["gamma"], random_state=42)
svr_model = SGDRegressor(
    alpha=best_params["alpha"],
    learning_rate=best_params["learning_rate"],
    eta0=best_params["eta0"],
    max_iter=1_000,
    tol=1e-3,
    random_state=42
)

final_pipeline = make_pipeline(rbf_feature, svr_model)
final_pipeline.fit(X_train, y_train)
y_pred = final_pipeline.predict(X_test)

# Evaluate
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Best hyperparameters:", best_params)
print(f"Final MAE: {mae:.4f}")
print(f"Final R² Score: {r2:.4f}")


[I 2025-06-30 05:00:33,069] A new study created in memory with name: no-name-f884dfea-97a2-45f1-bf7b-8754beb865a8
[I 2025-06-30 05:00:33,144] Trial 0 finished with value: 0.0013821951550477803 and parameters: {'gamma': 0.10770083640735646, 'alpha': 2.0644106860382672e-05, 'learning_rate': 'constant', 'eta0': 0.0002464062097868806}. Best is trial 0 with value: 0.0013821951550477803.
[I 2025-06-30 05:00:33,216] Trial 1 finished with value: 0.0013500575865016675 and parameters: {'gamma': 0.053711740879546506, 'alpha': 0.0713847570097256, 'learning_rate': 'optimal', 'eta0': 0.09393806091062405}. Best is trial 1 with value: 0.0013500575865016675.
[I 2025-06-30 05:00:33,288] Trial 2 finished with value: 0.002296905661025671 and parameters: {'gamma': 0.0021249519496179333, 'alpha': 1.8738712537709903e-06, 'learning_rate': 'invscaling', 'eta0': 0.00047288763839655563}. Best is trial 1 with value: 0.0013500575865016675.
[I 2025-06-30 05:00:33,467] Trial 3 finished with value: 0.4321719169840857

Best hyperparameters: {'gamma': 0.010234944069217041, 'alpha': 5.072547320253634e-06, 'learning_rate': 'constant', 'eta0': 0.024025523262885883}
Final MAE: 0.0003
Final R² Score: -1003.7826


# Service 2

In [8]:
import pandas as pd
import numpy as np
from sklearn.kernel_approximation import RBFSampler
from sklearn.linear_model import SGDRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import optuna

# Load dataset
df = pd.read_csv("/content/service-2-deployment_dataset.csv")

# Preprocess timestamp
df["Timestamp"] = pd.to_datetime(df["Timestamp"], format = 'mixed')
df = df.sort_values("Timestamp")

# Feature Engineering
df["cpu_allocated"] = df["CPU Request"]
df["memory_allocated"] = df["Memory Request"]
df["cpu_usage_pct"] = df["CPU Usage"] / df["CPU Limit"]
df["memory_usage_pct"] = df["Memory Usage"] / df["Memory Limit"]
df["request_rate_rps"] = df["Request Rate"]
df["latency_p95_t"] = df["Latency"].shift(1)
df["burstiness_score"] = df["Request Rate"].rolling(5).max() - df["Request Rate"].rolling(5).mean()

# Drop rows with NaNs caused by shift/rolling
df.dropna(inplace=True)

# Define features and target
features = [
    "cpu_allocated", "memory_allocated",
    "cpu_usage_pct", "memory_usage_pct",
    "request_rate_rps", "latency_p95_t",
    "burstiness_score"
]
X = df[features]
y = df["Latency"]

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, shuffle=False
)

# Optuna objective
def objective(trial):
    gamma = trial.suggest_loguniform("gamma", 1e-4, 1.0)
    alpha = trial.suggest_loguniform("alpha", 1e-6, 1e-1)
    learning_rate = trial.suggest_categorical("learning_rate", ["constant", "optimal", "invscaling", "adaptive"])
    eta0 = trial.suggest_loguniform("eta0", 1e-4, 0.1)

    rbf_feature = RBFSampler(gamma=gamma, random_state=42)
    svr_model = SGDRegressor(
        alpha=alpha,
        learning_rate=learning_rate,
        eta0=eta0,
        max_iter=1_000,
        tol=1e-3,
        random_state=42
    )

    pipeline = make_pipeline(rbf_feature, svr_model)
    pipeline.fit(X_train, y_train)
    preds = pipeline.predict(X_test)

    return mean_absolute_error(y_test, preds)

# Run hyperparameter optimization
study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)

# Train best model
best_params = study.best_params

rbf_feature = RBFSampler(gamma=best_params["gamma"], random_state=42)
svr_model = SGDRegressor(
    alpha=best_params["alpha"],
    learning_rate=best_params["learning_rate"],
    eta0=best_params["eta0"],
    max_iter=1_000,
    tol=1e-3,
    random_state=42
)

final_pipeline = make_pipeline(rbf_feature, svr_model)
final_pipeline.fit(X_train, y_train)
y_pred = final_pipeline.predict(X_test)

# Evaluate
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Best hyperparameters:", best_params)
print(f"Final MAE: {mae:.8f}")
print(f"Final R² Score: {r2:.4f}")


[I 2025-06-30 05:06:27,119] A new study created in memory with name: no-name-540d3e3d-49b5-4ced-bcaf-f0168c7fbd4b
[I 2025-06-30 05:06:27,289] Trial 0 finished with value: 1.1236882706841673e-06 and parameters: {'gamma': 0.0046024749106632476, 'alpha': 0.00012554771770763605, 'learning_rate': 'adaptive', 'eta0': 0.04733978938142028}. Best is trial 0 with value: 1.1236882706841673e-06.
[I 2025-06-30 05:06:27,343] Trial 1 finished with value: 1.0786619070208852e-06 and parameters: {'gamma': 0.01829590942747559, 'alpha': 0.004857565940913524, 'learning_rate': 'invscaling', 'eta0': 0.018809440022952626}. Best is trial 1 with value: 1.0786619070208852e-06.
[I 2025-06-30 05:06:27,402] Trial 2 finished with value: 1.3572266388849008e-06 and parameters: {'gamma': 0.0001039691641499061, 'alpha': 2.078173006729501e-06, 'learning_rate': 'invscaling', 'eta0': 0.06646330526348257}. Best is trial 1 with value: 1.0786619070208852e-06.
[I 2025-06-30 05:06:29,582] Trial 3 finished with value: 60984064.4

Best hyperparameters: {'gamma': 0.39351183003477935, 'alpha': 0.005907964099798556, 'learning_rate': 'adaptive', 'eta0': 0.03647831942802397}
Final MAE: 0.00000080
Final R² Score: -0.0190
