<a href="https://colab.research.google.com/github/e19166/e19-4yp-Dynamic-Multi-Dimensional-Resource-Orchestration-in-Kubernetes/blob/main/Models/Hoeffding/HoeffdingTree.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Service 1

In [1]:
!pip install river optuna pandas scikit-learn

Collecting river
  Downloading river-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting pandas
  Downloading pandas-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading river-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m16.3 

In [2]:
import pandas as pd
import numpy as np
from river import tree, metrics, preprocessing, compose
from river.utils import rolling
import optuna
from sklearn.model_selection import train_test_split

# Service 1

In [3]:
# Load data
df = pd.read_csv("/content/service-1-deployment_dataset.csv")

In [5]:
# Preprocess Timestamp
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='mixed')
df = df.sort_values("Timestamp")

In [6]:
# Feature engineering
df['hour'] = df['Timestamp'].dt.hour
df['dayofweek'] = df['Timestamp'].dt.dayofweek
df['cpu_usage_pct'] = df['CPU Usage'] / df['CPU Limit']
df['memory_usage_pct'] = df['Memory Usage'] / df['Memory Limit']
df['request_rate_rps'] = df['Request Rate']
df['cpu_allocated'] = df['CPU Request']
df['memory_allocated'] = df['Memory Request']

In [7]:
# Rolling features
df['cpu_usage_mean_5'] = df['cpu_usage_pct'].rolling(window=5).mean().bfill()
df['memory_usage_mean_5'] = df['memory_usage_pct'].rolling(window=5).mean().bfill()
df['cpu_usage_std_5'] = df['cpu_usage_pct'].rolling(window=5).std().bfill()
df['memory_usage_std_5'] = df['memory_usage_pct'].rolling(window=5).std().bfill()

In [11]:
# Target = next-step CPU & Memory Usage (t+1)
df['cpu_needed_t+1'] = df['CPU Usage'].shift(-1)
df['memory_needed_t+1'] = df['Memory Usage'].shift(-1)
df = df.dropna()

In [12]:
# Final features
features = [
    'cpu_usage_pct', 'memory_usage_pct', 'request_rate_rps',
    'cpu_allocated', 'memory_allocated',
    'cpu_usage_mean_5', 'memory_usage_mean_5',
    'cpu_usage_std_5', 'memory_usage_std_5',
    'hour', 'dayofweek'
]

In [13]:
X = df[features].to_dict(orient='records')
y_cpu = df['cpu_needed_t+1'].values
y_mem = df['memory_needed_t+1'].values

In [14]:
def evaluate_tree_cpu(params):
    model = compose.Pipeline(
        preprocessing.StandardScaler(),
        tree.HoeffdingTreeRegressor(
            grace_period=int(params['grace_period']),
            delta=params['delta'],
            tau=params['tau']
        )
    )
    mae = metrics.MAE()
    r2 = metrics.R2()

    for xi, yi in zip(X, y_cpu):
        y_pred = model.predict_one(xi)
        if y_pred is not None:
            mae.update(yi, y_pred)
            r2.update(yi, y_pred)
        model.learn_one(xi, yi)

    return mae.get(), r2.get()


In [15]:
def objective(trial):
    params = {
        'grace_period': trial.suggest_int('grace_period', 10, 100),
        'delta': trial.suggest_float('delta', 1e-7, 1e-2, log=True),
        'tau': trial.suggest_float('tau', 0.00001, 0.001)
    }
    mae, _ = evaluate_tree_cpu(params)
    return mae

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

print("Best parameters:", study.best_params)


[I 2025-06-29 16:17:30,276] A new study created in memory with name: no-name-039fab09-a183-4c85-8f2b-198ac1de7ae1
[I 2025-06-29 16:17:34,262] Trial 0 finished with value: 0.0034473384702946564 and parameters: {'grace_period': 33, 'delta': 0.0013135428080679368, 'tau': 0.000702946374107081}. Best is trial 0 with value: 0.0034473384702946564.
[I 2025-06-29 16:17:36,367] Trial 1 finished with value: 0.0034473384702946564 and parameters: {'grace_period': 79, 'delta': 0.004577830109730474, 'tau': 2.770268300217517e-05}. Best is trial 0 with value: 0.0034473384702946564.
[I 2025-06-29 16:17:39,217] Trial 2 finished with value: 0.0022862192581796295 and parameters: {'grace_period': 34, 'delta': 0.0015208404458674535, 'tau': 0.00044331537748983996}. Best is trial 2 with value: 0.0022862192581796295.
[I 2025-06-29 16:17:45,736] Trial 3 finished with value: 0.0034554899680476043 and parameters: {'grace_period': 13, 'delta': 0.0010880782160020113, 'tau': 0.0009162407492107129}. Best is trial 2 wi

Best parameters: {'grace_period': 41, 'delta': 2.4126617167694733e-05, 'tau': 0.0006186044220216467}
