In [8]:
!pip install optuna



# Service 1

In [None]:
import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load dataset
df = pd.read_csv("Service1.csv")

# Define input features and output targets
X = df[['latency_ms', 'cpu_allocated', 'memory_allocated', 'cpu_usage_pct', 'memory_usage_pct']]
y = df[['cpu_usage_pct', 'memory_usage_pct']]

# Split into train (70%), validation (15%), test (15%)
X_train_full, X_temp, y_train_full, y_temp = train_test_split(X, y, test_size=0.30, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)

print(f"Train set size: {len(X_train_full)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

# Define Optuna objective function
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
    }
    model = MultiOutputRegressor(XGBRegressor(**params, random_state=42, verbosity=0))
    model.fit(X_train_full, y_train_full)
    preds = model.predict(X_val)
    return mean_squared_error(y_val, preds)

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)
print("Best Parameters from Optuna:", study.best_params)

# Train final model on train + validation sets
X_train_combined = pd.concat([X_train_full, X_val])
y_train_combined = pd.concat([y_train_full, y_val])

best_params = study.best_params
final_model = MultiOutputRegressor(XGBRegressor(**best_params, random_state=42))
final_model.fit(X_train_combined, y_train_combined)

# Evaluate on test set
predictions = final_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
print(f"\nTest MSE: {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")

# Predict on a new sample (optional)
sample_input = pd.DataFrame([[300,0.25,512, 45, 60]], columns=['latency_ms', 'cpu_allocated', 'memory_allocated', 'cpu_usage_pct', 'memory_usage_pct'])
predicted_allocation = final_model.predict(sample_input)
print(f"\nSample Input Prediction (CPU, Memory): {predicted_allocation[0]}")


[I 2025-05-05 07:46:29,554] A new study created in memory with name: no-name-7eab056c-64ec-48c2-9363-75c19ee3dd00


Train set size: 7000
Validation set size: 1500
Test set size: 1500


[I 2025-05-05 07:46:30,477] Trial 0 finished with value: 0.011159097775816917 and parameters: {'n_estimators': 360, 'max_depth': 7, 'learning_rate': 0.10025836706344864, 'subsample': 0.855654903506101, 'colsample_bytree': 0.7680487763553826}. Best is trial 0 with value: 0.011159097775816917.
[I 2025-05-05 07:46:34,430] Trial 1 finished with value: 0.012106318026781082 and parameters: {'n_estimators': 380, 'max_depth': 10, 'learning_rate': 0.05143624771147402, 'subsample': 0.7897656843036991, 'colsample_bytree': 0.8653594971197125}. Best is trial 0 with value: 0.011159097775816917.
[I 2025-05-05 07:46:39,052] Trial 2 finished with value: 0.01444062776863575 and parameters: {'n_estimators': 353, 'max_depth': 10, 'learning_rate': 0.2077432258242544, 'subsample': 0.7788024989252733, 'colsample_bytree': 0.8412386295049843}. Best is trial 0 with value: 0.011159097775816917.
[I 2025-05-05 07:46:39,404] Trial 3 finished with value: 0.010905393399298191 and parameters: {'n_estimators': 138, 'ma

Best Parameters from Optuna: {'n_estimators': 286, 'max_depth': 5, 'learning_rate': 0.04240863170733056, 'subsample': 0.9874681227091663, 'colsample_bytree': 0.9505032064375454}

Test MSE: 0.0077
Test RMSE: 0.0880

Sample Input Prediction (CPU, Memory): [79.64726  31.038286]


# Service 2

In [10]:
import pandas as pd
import optuna
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

# Load dataset
df = pd.read_csv("Service2.csv")

# Define input features and output targets
X = df[['latency_ms', 'cpu_allocated', 'memory_allocated', 'cpu_usage_pct', 'memory_usage_pct']]
y = df[['cpu_allocated', 'memory_allocated']]

# Split into train (70%), validation (15%), test (15%)
X_train_full, X_temp, y_train_full, y_temp = train_test_split(X, y, test_size=0.30, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42)

print(f"Train set size: {len(X_train_full)}")
print(f"Validation set size: {len(X_val)}")
print(f"Test set size: {len(X_test)}")

# Define Optuna objective function
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),
    }
    model = MultiOutputRegressor(XGBRegressor(**params, random_state=42, verbosity=0))
    model.fit(X_train_full, y_train_full)
    preds = model.predict(X_val)
    return mean_squared_error(y_val, preds)

# Run Optuna optimization
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=30)
print("Best Parameters from Optuna:", study.best_params)

# Train final model on train + validation sets
X_train_combined = pd.concat([X_train_full, X_val])
y_train_combined = pd.concat([y_train_full, y_val])

best_params = study.best_params
final_model = MultiOutputRegressor(XGBRegressor(**best_params, random_state=42))
final_model.fit(X_train_combined, y_train_combined)

# Evaluate on test set
predictions = final_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
print(f"\nTest MSE: {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")

# Predict on a new sample (optional)
sample_input = pd.DataFrame([[300,0.25, 500, 45, 60]], columns=['latency_ms', 'cpu_allocated', 'memory_allocated', 'cpu_usage_pct', 'memory_usage_pct'])
predicted_allocation = final_model.predict(sample_input)
print(f"\nSample Input Prediction (CPU, Memory): {predicted_allocation[0]}")


[I 2025-05-05 08:05:42,426] A new study created in memory with name: no-name-ec6c209a-81a0-49f5-811b-0c6edbb455e7


Train set size: 7000
Validation set size: 1500
Test set size: 1500


[I 2025-05-05 08:05:42,955] Trial 0 finished with value: 1.0692851543426514 and parameters: {'n_estimators': 405, 'max_depth': 5, 'learning_rate': 0.16047019899306242, 'subsample': 0.746845180620814, 'colsample_bytree': 0.8802228716741932}. Best is trial 0 with value: 1.0692851543426514.
[I 2025-05-05 08:05:43,392] Trial 1 finished with value: 0.9658798575401306 and parameters: {'n_estimators': 166, 'max_depth': 8, 'learning_rate': 0.12659433073193643, 'subsample': 0.8172612061260072, 'colsample_bytree': 0.9700480650586986}. Best is trial 1 with value: 0.9658798575401306.
[I 2025-05-05 08:05:43,853] Trial 2 finished with value: 1.2115553617477417 and parameters: {'n_estimators': 410, 'max_depth': 4, 'learning_rate': 0.23022813670049627, 'subsample': 0.8835620205487946, 'colsample_bytree': 0.9285258391465312}. Best is trial 1 with value: 0.9658798575401306.
[I 2025-05-05 08:05:45,171] Trial 3 finished with value: 1.0734285116195679 and parameters: {'n_estimators': 436, 'max_depth': 9, '

Best Parameters from Optuna: {'n_estimators': 465, 'max_depth': 7, 'learning_rate': 0.029464046555470448, 'subsample': 0.8272020582423519, 'colsample_bytree': 0.8880107644386758}

Test MSE: 0.7573
Test RMSE: 0.8702

Sample Input Prediction (CPU, Memory): [2.7600870e-01 7.4864545e+02]
