In [1]:
!pip install optuna
!pip install xgboost

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


# Service 1

In [9]:
import pandas as pd
import numpy as np
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import optuna
import joblib
import os

ENSEMBLE_SIZE = 5
MODELS_DIR = "model_training/models"
os.makedirs(MODELS_DIR, exist_ok=True)

# Load & preprocess data
df = pd.read_csv("model_training/data/Service1.csv")

# Create features and handle potential issues
X = df[["cpu_usage_pct", "memory_usage_pct"]].copy()

# Add log transformations, handling zeros/small values
X["cpu_log"] = np.log1p(X["cpu_usage_pct"])  # log1p handles zeros safely
X["mem_log"] = np.log1p(X["memory_usage_pct"])

# Calculate ratio safely to avoid division by zero
# Replace zeros with a small value and clip to avoid infinity
eps = 1e-10  # small epsilon value
X["cpu_to_mem"] = X["cpu_usage_pct"] / (X["memory_usage_pct"].replace(0, eps))
X["cpu_to_mem"] = X["cpu_to_mem"].clip(0, 1000)  # Clip to reasonable range

# Replace any remaining infinities with NaN and then fill with a large value
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(9999)  # Use a large but finite number as replacement

y = df["latency_ms"]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 600),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-3, 10.0, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-3, 10.0, log=True),
        "random_state": 42,
    }

    # Add missing value handling explicitly
    model = XGBRegressor(**params, missing=np.nan)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    rmse = mean_squared_error(y_val, preds)  # Return RMSE instead of MSE
    return rmse

# Train ensemble
for i in range(ENSEMBLE_SIZE):
    print(f"\n🔁 Optimizing model {i+1}/{ENSEMBLE_SIZE}")

    # Resample data for diversity
    X_bootstrap, y_bootstrap = resample(X_train, y_train, random_state=i)

    def boot_objective(trial):
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 500),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
            "subsample": trial.suggest_float("subsample", 0.7, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.7, 1.0),
            "reg_lambda": trial.suggest_float("reg_lambda", 0.1, 10.0),
            "reg_alpha": trial.suggest_float("reg_alpha", 0.1, 10.0),
            "random_state": i,
        }
        # Add missing value handling explicitly
        model = XGBRegressor(**params, missing=np.nan)
        model.fit(X_bootstrap, y_bootstrap)
        preds = model.predict(X_val)
        return mean_squared_error(y_val, preds)  # Return RMSE

    try:
        study = optuna.create_study(direction="minimize")
        study.optimize(boot_objective, n_trials=30)

        print(f"✅ Best trial for model {i+1}: RMSE = {study.best_trial.value:.4f}")
        best_model = XGBRegressor(**study.best_params, missing=np.nan)
        best_model.fit(X_bootstrap, y_bootstrap)
        joblib.dump(best_model, f"{MODELS_DIR}/ensemble_model_{i}.pkl")
    except Exception as e:
        print(f" Error training model {i+1}: {str(e)}")

print("\n✨ Ensemble training complete!")

[I 2025-05-05 06:22:30,596] A new study created in memory with name: no-name-44f208bc-af28-44e5-a02e-2b9327079356



🔁 Optimizing model 1/5


[I 2025-05-05 06:22:31,377] Trial 0 finished with value: 3799.947038802802 and parameters: {'n_estimators': 381, 'max_depth': 10, 'learning_rate': 0.22156380591865274, 'subsample': 0.9038496146709745, 'colsample_bytree': 0.8937005361033081, 'reg_lambda': 7.112536577199356, 'reg_alpha': 8.830375244681447}. Best is trial 0 with value: 3799.947038802802.
[I 2025-05-05 06:22:31,948] Trial 1 finished with value: 3806.6508168620926 and parameters: {'n_estimators': 320, 'max_depth': 10, 'learning_rate': 0.2332779876530161, 'subsample': 0.811022803344907, 'colsample_bytree': 0.7376027346365946, 'reg_lambda': 4.018993781286586, 'reg_alpha': 9.870125992884189}. Best is trial 0 with value: 3799.947038802802.
[I 2025-05-05 06:22:32,733] Trial 2 finished with value: 3767.0877099296367 and parameters: {'n_estimators': 397, 'max_depth': 10, 'learning_rate': 0.07118992209602909, 'subsample': 0.8120230078856923, 'colsample_bytree': 0.9965993920788356, 'reg_lambda': 6.007848702497115, 'reg_alpha': 2.163

✅ Best trial for model 1: RMSE = 3410.1777


[I 2025-05-05 06:22:42,994] A new study created in memory with name: no-name-4ab5c13d-1f5d-45b1-9ef5-033d8814ac92
[I 2025-05-05 06:22:43,169] Trial 0 finished with value: 3538.19018958239 and parameters: {'n_estimators': 328, 'max_depth': 3, 'learning_rate': 0.165071067346541, 'subsample': 0.990613091760223, 'colsample_bytree': 0.9499417682665475, 'reg_lambda': 8.768063291173142, 'reg_alpha': 8.373277081377838}. Best is trial 0 with value: 3538.19018958239.



🔁 Optimizing model 2/5


[I 2025-05-05 06:22:43,286] Trial 1 finished with value: 3558.6051958994813 and parameters: {'n_estimators': 186, 'max_depth': 3, 'learning_rate': 0.28812595323624485, 'subsample': 0.8403122641937707, 'colsample_bytree': 0.9680191173437407, 'reg_lambda': 2.044037843450127, 'reg_alpha': 0.5960960061512209}. Best is trial 0 with value: 3538.19018958239.
[I 2025-05-05 06:22:43,855] Trial 2 finished with value: 3673.968732554823 and parameters: {'n_estimators': 425, 'max_depth': 8, 'learning_rate': 0.2799240910708863, 'subsample': 0.7678084798592083, 'colsample_bytree': 0.7101244270376538, 'reg_lambda': 5.756646198099172, 'reg_alpha': 1.4353321828176568}. Best is trial 0 with value: 3538.19018958239.
[I 2025-05-05 06:22:44,306] Trial 3 finished with value: 3674.099404840587 and parameters: {'n_estimators': 487, 'max_depth': 6, 'learning_rate': 0.1993461918676045, 'subsample': 0.821141268648829, 'colsample_bytree': 0.7570902244508205, 'reg_lambda': 3.8092314589799323, 'reg_alpha': 0.1536256

✅ Best trial for model 2: RMSE = 3413.8530


[I 2025-05-05 06:22:54,656] A new study created in memory with name: no-name-43adb67f-03b6-4699-a2e4-5f7abffc8449



🔁 Optimizing model 3/5


[I 2025-05-05 06:22:54,869] Trial 0 finished with value: 3716.3003693595265 and parameters: {'n_estimators': 260, 'max_depth': 5, 'learning_rate': 0.14798260940759184, 'subsample': 0.79790947587255, 'colsample_bytree': 0.8621481747619402, 'reg_lambda': 7.579905878044278, 'reg_alpha': 4.286927269046714}. Best is trial 0 with value: 3716.3003693595265.
[I 2025-05-05 06:22:54,998] Trial 1 finished with value: 3635.197328767781 and parameters: {'n_estimators': 159, 'max_depth': 5, 'learning_rate': 0.17309415943867043, 'subsample': 0.9350358710355967, 'colsample_bytree': 0.909422199314774, 'reg_lambda': 9.983050669061829, 'reg_alpha': 2.55271763583691}. Best is trial 1 with value: 3635.197328767781.
[I 2025-05-05 06:22:55,223] Trial 2 finished with value: 3855.0861897837076 and parameters: {'n_estimators': 192, 'max_depth': 7, 'learning_rate': 0.17859596560885788, 'subsample': 0.9400898053601225, 'colsample_bytree': 0.9700852889144727, 'reg_lambda': 0.7672021470274184, 'reg_alpha': 4.180223

✅ Best trial for model 3: RMSE = 3440.5476


[I 2025-05-05 06:23:03,393] A new study created in memory with name: no-name-34edfc19-e2e1-4f31-aa99-b2e8354dd70d
[I 2025-05-05 06:23:03,570] Trial 0 finished with value: 3556.0602560452467 and parameters: {'n_estimators': 232, 'max_depth': 5, 'learning_rate': 0.03830939234044469, 'subsample': 0.9378833758459371, 'colsample_bytree': 0.7813679463644435, 'reg_lambda': 7.212417745174003, 'reg_alpha': 9.108071796531918}. Best is trial 0 with value: 3556.0602560452467.



🔁 Optimizing model 4/5


[I 2025-05-05 06:23:04,228] Trial 1 finished with value: 4042.8132006504097 and parameters: {'n_estimators': 359, 'max_depth': 9, 'learning_rate': 0.1976969840580976, 'subsample': 0.9587247668856158, 'colsample_bytree': 0.7384950748909953, 'reg_lambda': 0.7340487166442304, 'reg_alpha': 5.270450108455512}. Best is trial 0 with value: 3556.0602560452467.
[I 2025-05-05 06:23:06,728] Trial 2 finished with value: 4035.0342128619804 and parameters: {'n_estimators': 386, 'max_depth': 9, 'learning_rate': 0.1822482293219455, 'subsample': 0.9828859344483731, 'colsample_bytree': 0.9997619175448158, 'reg_lambda': 8.419387155978347, 'reg_alpha': 3.8459267120833553}. Best is trial 0 with value: 3556.0602560452467.
[I 2025-05-05 06:23:07,317] Trial 3 finished with value: 4027.6453686336135 and parameters: {'n_estimators': 488, 'max_depth': 7, 'learning_rate': 0.17618975420018004, 'subsample': 0.9103872289733936, 'colsample_bytree': 0.8838435504592181, 'reg_lambda': 4.780834993978307, 'reg_alpha': 2.9

✅ Best trial for model 4: RMSE = 3454.0128

🔁 Optimizing model 5/5


[I 2025-05-05 06:23:13,143] Trial 0 finished with value: 3742.9898697978147 and parameters: {'n_estimators': 406, 'max_depth': 6, 'learning_rate': 0.20371084162320333, 'subsample': 0.8344018491667179, 'colsample_bytree': 0.7181530084805158, 'reg_lambda': 4.442512323414187, 'reg_alpha': 2.143454088199438}. Best is trial 0 with value: 3742.9898697978147.
[I 2025-05-05 06:23:13,692] Trial 1 finished with value: 3777.2593330859386 and parameters: {'n_estimators': 419, 'max_depth': 8, 'learning_rate': 0.26417785414249595, 'subsample': 0.9356940495494454, 'colsample_bytree': 0.7381937661992904, 'reg_lambda': 9.510755502566196, 'reg_alpha': 5.782161355319541}. Best is trial 0 with value: 3742.9898697978147.
[I 2025-05-05 06:23:14,272] Trial 2 finished with value: 3756.741203600954 and parameters: {'n_estimators': 356, 'max_depth': 9, 'learning_rate': 0.18878457702654647, 'subsample': 0.7042533927202599, 'colsample_bytree': 0.7935920985714289, 'reg_lambda': 8.784949687863165, 'reg_alpha': 8.91

✅ Best trial for model 5: RMSE = 3410.3390

✨ Ensemble training complete!
