In [2]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


# Service 1

In [4]:
import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load dataset
df = pd.read_csv("Service1.csv")

# Define input features and output targets
X = df[['latency_ms', 'cpu_usage_pct', 'memory_usage_pct']]
y = df[['cpu_allocated', 'memory_allocated']]

# Split into train (70%), validation (15%), test (15%)
X_train_full, X_temp, y_train_full, y_temp = train_test_split(X, y, test_size=0.30, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)

print(f"Train set size: {len(X_train_full)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

# Define Optuna objective function
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
    }
    model = MultiOutputRegressor(XGBRegressor(**params, random_state=42, verbosity=0))
    model.fit(X_train_full, y_train_full)
    preds = model.predict(X_val)
    return mean_squared_error(y_val, preds)

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)
print("Best Parameters from Optuna:", study.best_params)

# Train final model on train + validation sets
X_train_combined = pd.concat([X_train_full, X_val])
y_train_combined = pd.concat([y_train_full, y_val])

best_params = study.best_params
final_model = MultiOutputRegressor(XGBRegressor(**best_params, random_state=42))
final_model.fit(X_train_combined, y_train_combined)

# Evaluate on test set
predictions = final_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
print(f"\nTest MSE: {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")

# Predict on a new sample (optional)
sample_input = pd.DataFrame([[300, 45, 60]], columns=['latency_ms', 'cpu_usage_pct', 'memory_usage_pct'])
predicted_allocation = final_model.predict(sample_input)
print(f"\nSample Input Prediction (CPU, Memory): {predicted_allocation[0]}")


[I 2025-05-05 05:23:21,528] A new study created in memory with name: no-name-5df2a79e-6a9a-48ba-b799-199d46aa384d


Train set size: 7000
Validation set size: 1500
Test set size: 1500


[I 2025-05-05 05:23:22,121] Trial 0 finished with value: 2.087468385696411 and parameters: {'n_estimators': 355, 'max_depth': 6, 'learning_rate': 0.1635596850344956, 'subsample': 0.8805197219086345, 'colsample_bytree': 0.9769853631716527}. Best is trial 0 with value: 2.087468385696411.
[I 2025-05-05 05:23:22,651] Trial 1 finished with value: 2.1974427700042725 and parameters: {'n_estimators': 328, 'max_depth': 5, 'learning_rate': 0.18748230776050948, 'subsample': 0.8580518973274707, 'colsample_bytree': 0.7398697191367485}. Best is trial 0 with value: 2.087468385696411.
[I 2025-05-05 05:23:23,010] Trial 2 finished with value: 2.0826940536499023 and parameters: {'n_estimators': 140, 'max_depth': 7, 'learning_rate': 0.059244198289769665, 'subsample': 0.700841388706234, 'colsample_bytree': 0.9195600232404929}. Best is trial 2 with value: 2.0826940536499023.
[I 2025-05-05 05:23:23,375] Trial 3 finished with value: 2.7727108001708984 and parameters: {'n_estimators': 430, 'max_depth': 3, 'lea

Best Parameters from Optuna: {'n_estimators': 356, 'max_depth': 6, 'learning_rate': 0.06501163826011067, 'subsample': 0.7488447516783117, 'colsample_bytree': 0.9121860467272286}

Test MSE: 1.6407
Test RMSE: 1.2809

Sample Input Prediction (CPU, Memory): [3.955172e-01 8.311766e+02]


# Service 2

In [5]:
import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load dataset
df = pd.read_csv("Service2.csv")

# Define input features and output targets
X = df[['latency_ms', 'cpu_usage_pct', 'memory_usage_pct']]
y = df[['cpu_allocated', 'memory_allocated']]

# Split into train (70%), validation (15%), test (15%)
X_train_full, X_temp, y_train_full, y_temp = train_test_split(X, y, test_size=0.30, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)

print(f"Train set size: {len(X_train_full)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

# Define Optuna objective function
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
    }
    model = MultiOutputRegressor(XGBRegressor(**params, random_state=42, verbosity=0))
    model.fit(X_train_full, y_train_full)
    preds = model.predict(X_val)
    return mean_squared_error(y_val, preds)

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)
print("Best Parameters from Optuna:", study.best_params)

# Train final model on train + validation sets
X_train_combined = pd.concat([X_train_full, X_val])
y_train_combined = pd.concat([y_train_full, y_val])

best_params = study.best_params
final_model = MultiOutputRegressor(XGBRegressor(**best_params, random_state=42))
final_model.fit(X_train_combined, y_train_combined)

# Evaluate on test set
predictions = final_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
print(f"\nTest MSE: {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")

# Predict on a new sample (optional)
sample_input = pd.DataFrame([[300, 45, 60]], columns=['latency_ms', 'cpu_usage_pct', 'memory_usage_pct'])
predicted_allocation = final_model.predict(sample_input)
print(f"\nSample Input Prediction (CPU, Memory): {predicted_allocation[0]}")


[I 2025-05-05 05:24:43,111] A new study created in memory with name: no-name-cf26ff66-f736-48b1-a169-c2fc3ef662d1


Train set size: 7000
Validation set size: 1500
Test set size: 1500


[I 2025-05-05 05:24:43,484] Trial 0 finished with value: 2.0155270099639893 and parameters: {'n_estimators': 121, 'max_depth': 7, 'learning_rate': 0.07740689028917813, 'subsample': 0.8716743342777701, 'colsample_bytree': 0.9326504347162105}. Best is trial 0 with value: 2.0155270099639893.
[I 2025-05-05 05:24:45,913] Trial 1 finished with value: 2.04429292678833 and parameters: {'n_estimators': 497, 'max_depth': 5, 'learning_rate': 0.20266540225439258, 'subsample': 0.8759929807528308, 'colsample_bytree': 0.7581415564546611}. Best is trial 0 with value: 2.0155270099639893.
[I 2025-05-05 05:24:46,352] Trial 2 finished with value: 2.271047592163086 and parameters: {'n_estimators': 167, 'max_depth': 5, 'learning_rate': 0.047323676140146154, 'subsample': 0.8273863448785178, 'colsample_bytree': 0.7644331224878637}. Best is trial 0 with value: 2.0155270099639893.
[I 2025-05-05 05:24:46,651] Trial 3 finished with value: 1.9297618865966797 and parameters: {'n_estimators': 177, 'max_depth': 5, 'l

Best Parameters from Optuna: {'n_estimators': 441, 'max_depth': 9, 'learning_rate': 0.03783641557069581, 'subsample': 0.9486357483425217, 'colsample_bytree': 0.7997289985809171}

Test MSE: 1.7450
Test RMSE: 1.3210

Sample Input Prediction (CPU, Memory): [4.1809377e-01 8.0070978e+02]
