<a href="https://colab.research.google.com/github/e19166/e19-4yp-Dynamic-Multi-Dimensional-Resource-Orchestration-in-Kubernetes/blob/main/Models/SDG/Resource_SDG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Service 1

In [1]:
pip install pandas scikit-learn optuna

Collecting optuna
  Downloading optuna-4.4.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.16.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.4.0-py3-none-any.whl (395 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m395.9/395.9 kB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.16.2-py3-none-any.whl (242 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m242.7/242.7 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.16.2 colorlog-6.9.0 optuna-4.4.0


In [3]:
import pandas as pd
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, r2_score
import optuna

In [8]:
# Load and preprocess the dataset
df = pd.read_csv("/content/service-1-deployment_dataset.csv")

In [9]:
# --- Feature Engineering ---

# Convert Timestamp to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='mixed')

# Sort by time
df = df.sort_values('Timestamp')

# Time features
df['hour'] = df['Timestamp'].dt.hour
df['dayofweek'] = df['Timestamp'].dt.dayofweek

# Usage percentages
df['cpu_usage_pct'] = df['CPU Usage'] / df['CPU Limit']
df['memory_usage_pct'] = df['Memory Usage'] / df['Memory Limit']
df['request_rate_rps'] = df['Request Rate']

# Rolling trends (example over 5 past rows, you can adjust window)
df['cpu_usage_mean_5'] = df['cpu_usage_pct'].rolling(window=5).mean().fillna(method='bfill')
df['memory_usage_mean_5'] = df['memory_usage_pct'].rolling(window=5).mean().fillna(method='bfill')
df['cpu_usage_std_5'] = df['cpu_usage_pct'].rolling(window=5).std().fillna(method='bfill')
df['memory_usage_std_5'] = df['memory_usage_pct'].rolling(window=5).std().fillna(method='bfill')

# Targets: CPU Needed and Memory Needed = CPU Usage & Memory Usage of next timestamp
df['cpu_needed_t+1'] = df['CPU Usage'].shift(-1)
df['memory_needed_t+1'] = df['Memory Usage'].shift(-1)

# Drop NaN due to rolling/shift
df = df.dropna()

  df['cpu_usage_mean_5'] = df['cpu_usage_pct'].rolling(window=5).mean().fillna(method='bfill')
  df['memory_usage_mean_5'] = df['memory_usage_pct'].rolling(window=5).mean().fillna(method='bfill')
  df['cpu_usage_std_5'] = df['cpu_usage_pct'].rolling(window=5).std().fillna(method='bfill')
  df['memory_usage_std_5'] = df['memory_usage_pct'].rolling(window=5).std().fillna(method='bfill')


In [10]:
# --- Features & Target ---
features = [
    'cpu_usage_pct', 'memory_usage_pct', 'request_rate_rps',
    'CPU Request', 'Memory Request',
    'CPU Limit', 'Memory Limit',
    'cpu_usage_mean_5', 'memory_usage_mean_5',
    'cpu_usage_std_5', 'memory_usage_std_5',
    'hour', 'dayofweek'
]

X = df[features]
y_cpu = df['cpu_needed_t+1']
y_mem = df['memory_needed_t+1']

# Split
X_train, X_test, y_cpu_train, y_cpu_test = train_test_split(X, y_cpu, test_size=0.2, random_state=42)
_, _, y_mem_train, y_mem_test = train_test_split(X, y_mem, test_size=0.2, random_state=42)


In [11]:
# --- Optuna for SGDRegressor ---
def objective(trial):
    alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-1)
    eta0 = trial.suggest_loguniform('eta0', 1e-5, 1e-1)
    max_iter = trial.suggest_int('max_iter', 500, 2000)

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("sgd", SGDRegressor(
            loss='squared_error',
            penalty='l2',
            alpha=alpha,
            eta0=eta0,
            learning_rate='constant',
            max_iter=max_iter,
            random_state=42
        ))
    ])

    pipe.fit(X_train, y_cpu_train)
    preds = pipe.predict(X_test)
    mae = mean_absolute_error(y_cpu_test, preds)
    return mae

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

[I 2025-06-29 15:26:37,462] A new study created in memory with name: no-name-f659cbc8-4d2e-4718-ab39-a32f863c0bc9
  alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-1)
  eta0 = trial.suggest_loguniform('eta0', 1e-5, 1e-1)
[I 2025-06-29 15:26:37,511] Trial 0 finished with value: 8.88849941350912e-05 and parameters: {'alpha': 2.2076094166154712e-05, 'eta0': 0.00020376077107646937, 'max_iter': 1532}. Best is trial 0 with value: 8.88849941350912e-05.
  alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-1)
  eta0 = trial.suggest_loguniform('eta0', 1e-5, 1e-1)
[I 2025-06-29 15:26:37,540] Trial 1 finished with value: 0.00017148015016506355 and parameters: {'alpha': 0.01761074557254192, 'eta0': 0.00012372081498229531, 'max_iter': 1330}. Best is trial 0 with value: 8.88849941350912e-05.
  alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-1)
  eta0 = trial.suggest_loguniform('eta0', 1e-5, 1e-1)
[I 2025-06-29 15:26:37,578] Trial 2 finished with value: 6.863042011124645e-05 and parameters: {'al

In [12]:
# --- Best Model ---
best_params = study.best_params
print("Best Hyperparameters:", best_params)

final_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("sgd", SGDRegressor(
        loss='squared_error',
        penalty='l2',
        alpha=best_params['alpha'],
        eta0=best_params['eta0'],
        learning_rate='constant',
        max_iter=best_params['max_iter'],
        random_state=42
    ))
])

final_pipe.fit(X_train, y_cpu_train)
preds = final_pipe.predict(X_test)

Best Hyperparameters: {'alpha': 0.0004825069531850445, 'eta0': 0.005577074786219012, 'max_iter': 1118}


In [13]:
# Accuracy
mae = mean_absolute_error(y_cpu_test, preds)
r2 = r2_score(y_cpu_test, preds)

print(f"MAE (CPU Prediction): {mae:.4f}")
print(f"R² Score (CPU Prediction): {r2:.4f}")

MAE (CPU Prediction): 0.0001
R² Score (CPU Prediction): 0.9971


In [16]:
# Fit on training data
final_pipe.fit(X_train, y_mem_train)

# Predict on test set
mem_preds = final_pipe.predict(X_test)

# Accuracy metrics
mae_mem = mean_absolute_error(y_mem_test, mem_preds)
r2_mem = r2_score(y_mem_test, mem_preds)

print(f"MAE (Memory Prediction): {mae_mem:.4f}")
print(f"R² Score (Memory Prediction): {r2_mem:.4f}")

MAE (Memory Prediction): 1785783.5771
R² Score (Memory Prediction): 0.9682


# Service 2

In [17]:
# Load and preprocess the dataset
df = pd.read_csv("/content/service-2-deployment_dataset.csv")

In [19]:
# --- Feature Engineering ---

# Convert Timestamp to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='mixed')

# Sort by time
df = df.sort_values('Timestamp')

# Time features
df['hour'] = df['Timestamp'].dt.hour
df['dayofweek'] = df['Timestamp'].dt.dayofweek

# Usage percentages
df['cpu_usage_pct'] = df['CPU Usage'] / df['CPU Limit']
df['memory_usage_pct'] = df['Memory Usage'] / df['Memory Limit']
df['request_rate_rps'] = df['Request Rate']

# Rolling trends (example over 5 past rows, you can adjust window)
df['cpu_usage_mean_5'] = df['cpu_usage_pct'].rolling(window=5).mean().fillna(method='bfill')
df['memory_usage_mean_5'] = df['memory_usage_pct'].rolling(window=5).mean().fillna(method='bfill')
df['cpu_usage_std_5'] = df['cpu_usage_pct'].rolling(window=5).std().fillna(method='bfill')
df['memory_usage_std_5'] = df['memory_usage_pct'].rolling(window=5).std().fillna(method='bfill')

# Targets: CPU Needed and Memory Needed = CPU Usage & Memory Usage of next timestamp
df['cpu_needed_t+1'] = df['CPU Usage'].shift(-1)
df['memory_needed_t+1'] = df['Memory Usage'].shift(-1)

# Drop NaN due to rolling/shift
df = df.dropna()

In [20]:
# --- Features & Target ---
features = [
    'cpu_usage_pct', 'memory_usage_pct', 'request_rate_rps',
    'CPU Request', 'Memory Request',
    'CPU Limit', 'Memory Limit',
    'cpu_usage_mean_5', 'memory_usage_mean_5',
    'cpu_usage_std_5', 'memory_usage_std_5',
    'hour', 'dayofweek'
]

X = df[features]
y_cpu = df['cpu_needed_t+1']
y_mem = df['memory_needed_t+1']

# Split
X_train, X_test, y_cpu_train, y_cpu_test = train_test_split(X, y_cpu, test_size=0.2, random_state=42)
_, _, y_mem_train, y_mem_test = train_test_split(X, y_mem, test_size=0.2, random_state=42)


In [21]:
# --- Optuna for SGDRegressor ---
def objective(trial):
    alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-1)
    eta0 = trial.suggest_loguniform('eta0', 1e-5, 1e-1)
    max_iter = trial.suggest_int('max_iter', 500, 2000)

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("sgd", SGDRegressor(
            loss='squared_error',
            penalty='l2',
            alpha=alpha,
            eta0=eta0,
            learning_rate='constant',
            max_iter=max_iter,
            random_state=42
        ))
    ])

    pipe.fit(X_train, y_cpu_train)
    preds = pipe.predict(X_test)
    mae = mean_absolute_error(y_cpu_test, preds)
    return mae

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)

[I 2025-06-29 15:55:30,739] A new study created in memory with name: no-name-e0c6b283-a7ab-4779-b6b5-2009adbd1ae6
[I 2025-06-29 15:55:30,757] Trial 0 finished with value: 71435198697.07838 and parameters: {'alpha': 0.0033833100651102324, 'eta0': 0.013551228651186707, 'max_iter': 1737}. Best is trial 0 with value: 71435198697.07838.
[I 2025-06-29 15:55:30,771] Trial 1 finished with value: 0.34489869343154655 and parameters: {'alpha': 8.534234262881975e-05, 'eta0': 0.0023545054644681825, 'max_iter': 1919}. Best is trial 1 with value: 0.34489869343154655.
[I 2025-06-29 15:55:30,786] Trial 2 finished with value: 7.498747429545045e-05 and parameters: {'alpha': 0.03355060684856847, 'eta0': 0.00015879181637700572, 'max_iter': 1771}. Best is trial 2 with value: 7.498747429545045e-05.
[I 2025-06-29 15:55:30,801] Trial 3 finished with value: 0.005659597011662616 and parameters: {'alpha': 0.004078043820242971, 'eta0': 1.4527408295650175e-05, 'max_iter': 1112}. Best is trial 2 with value: 7.498747

In [22]:
# --- Best Model ---
best_params = study.best_params
print("Best Hyperparameters:", best_params)

final_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("sgd", SGDRegressor(
        loss='squared_error',
        penalty='l2',
        alpha=best_params['alpha'],
        eta0=best_params['eta0'],
        learning_rate='constant',
        max_iter=best_params['max_iter'],
        random_state=42
    ))
])

final_pipe.fit(X_train, y_cpu_train)
preds = final_pipe.predict(X_test)

Best Hyperparameters: {'alpha': 0.0006901449911859703, 'eta0': 0.0006156651439987768, 'max_iter': 979}
