In [1]:
!pip install optuna
!pip install xgboost

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m10.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


# Service 1

In [4]:
import pandas as pd
import numpy as np
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import optuna
import joblib
import os

ENSEMBLE_SIZE = 5
MODELS_DIR = "model_training/models"
os.makedirs(MODELS_DIR, exist_ok=True)

# Load & preprocess data
df = pd.read_csv("model_training/data/Service1.csv")

# Create features and handle potential issues
X = df[["cpu_usage_pct", "memory_usage_pct", 'latency_ms','memory_allocated', 'cpu_allocated']].copy()

# Add log transformations, handling zeros/small values
X["cpu_log"] = np.log1p(X["cpu_usage_pct"])  # log1p handles zeros safely
X["mem_log"] = np.log1p(X["memory_usage_pct"])

# Calculate ratio safely to avoid division by zero
# Replace zeros with a small value and clip to avoid infinity
eps = 1e-10  # small epsilon value
X["cpu_to_mem"] = X["cpu_usage_pct"] / (X["memory_usage_pct"].replace(0, eps))
X["cpu_to_mem"] = X["cpu_to_mem"].clip(0, 1000)  # Clip to reasonable range

# Replace any remaining infinities with NaN and then fill with a large value
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(9999)  # Use a large but finite number as replacement

y = df[["cpu_usage_pct", "memory_usage_pct"]]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 600),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-3, 10.0, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-3, 10.0, log=True),
        "random_state": 42,
    }

    # Add missing value handling explicitly
    model = XGBRegressor(**params, missing=np.nan)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    rmse = mean_squared_error(y_val, preds)  # Return RMSE instead of MSE
    return rmse

# Train ensemble
for i in range(ENSEMBLE_SIZE):
    print(f"\n🔁 Optimizing model {i+1}/{ENSEMBLE_SIZE}")

    # Resample data for diversity
    X_bootstrap, y_bootstrap = resample(X_train, y_train, random_state=i)

    def boot_objective(trial):
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 500),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
            "subsample": trial.suggest_float("subsample", 0.7, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.7, 1.0),
            "reg_lambda": trial.suggest_float("reg_lambda", 0.1, 10.0),
            "reg_alpha": trial.suggest_float("reg_alpha", 0.1, 10.0),
            "random_state": i,
        }
        # Add missing value handling explicitly
        model = XGBRegressor(**params, missing=np.nan)
        model.fit(X_bootstrap, y_bootstrap)
        preds = model.predict(X_val)
        return mean_squared_error(y_val, preds)  # Return RMSE

    try:
        study = optuna.create_study(direction="minimize")
        study.optimize(boot_objective, n_trials=30)

        print(f"✅ Best trial for model {i+1}: RMSE = {study.best_trial.value:.4f}")
        best_model = XGBRegressor(**study.best_params, missing=np.nan)
        best_model.fit(X_bootstrap, y_bootstrap)
        joblib.dump(best_model, f"{MODELS_DIR}/ensemble_model_{i}.pkl")
    except Exception as e:
        print(f" Error training model {i+1}: {str(e)}")

print("\n✨ Ensemble training complete!")

[I 2025-05-05 08:24:01,309] A new study created in memory with name: no-name-77c04b2e-205e-4a7a-be9b-ffdc4a237dda



🔁 Optimizing model 1/5


[I 2025-05-05 08:24:01,653] Trial 0 finished with value: 0.03416996821761131 and parameters: {'n_estimators': 112, 'max_depth': 4, 'learning_rate': 0.16403386497991557, 'subsample': 0.904079395251346, 'colsample_bytree': 0.7448467956392256, 'reg_lambda': 8.795162411894916, 'reg_alpha': 7.232727849370897}. Best is trial 0 with value: 0.03416996821761131.
[I 2025-05-05 08:24:02,385] Trial 1 finished with value: 0.025406669825315475 and parameters: {'n_estimators': 316, 'max_depth': 5, 'learning_rate': 0.1485831841688683, 'subsample': 0.7190964208278577, 'colsample_bytree': 0.7007255620426903, 'reg_lambda': 7.195511036329591, 'reg_alpha': 6.471230396417753}. Best is trial 1 with value: 0.025406669825315475.
[I 2025-05-05 08:24:05,667] Trial 2 finished with value: 0.009745702147483826 and parameters: {'n_estimators': 194, 'max_depth': 5, 'learning_rate': 0.07972916140500921, 'subsample': 0.8714224331071192, 'colsample_bytree': 0.7708316571461938, 'reg_lambda': 3.739161805707223, 'reg_alpha

✅ Best trial for model 1: RMSE = 0.0081


[I 2025-05-05 08:24:35,576] A new study created in memory with name: no-name-59d61359-144a-4a24-a414-b0b2fc611b88



🔁 Optimizing model 2/5


[I 2025-05-05 08:24:37,538] Trial 0 finished with value: 0.014530627056956291 and parameters: {'n_estimators': 377, 'max_depth': 9, 'learning_rate': 0.1487347806666731, 'subsample': 0.7731826573248057, 'colsample_bytree': 0.9437413492172427, 'reg_lambda': 7.705554693652514, 'reg_alpha': 1.6525850050118003}. Best is trial 0 with value: 0.014530627056956291.
[I 2025-05-05 08:24:37,928] Trial 1 finished with value: 0.014375206083059311 and parameters: {'n_estimators': 132, 'max_depth': 7, 'learning_rate': 0.1420114745284488, 'subsample': 0.8314679949228443, 'colsample_bytree': 0.9921903247092208, 'reg_lambda': 0.929717567560089, 'reg_alpha': 5.646109177504254}. Best is trial 1 with value: 0.014375206083059311.
[I 2025-05-05 08:24:38,375] Trial 2 finished with value: 0.021167241036891937 and parameters: {'n_estimators': 107, 'max_depth': 8, 'learning_rate': 0.1969226597559722, 'subsample': 0.8594167951604742, 'colsample_bytree': 0.9765754130591817, 'reg_lambda': 2.3231750381115104, 'reg_al

✅ Best trial for model 2: RMSE = 0.0089


[I 2025-05-05 08:25:04,501] A new study created in memory with name: no-name-b733fc44-3887-44b6-91d3-b1fd06469e71



🔁 Optimizing model 3/5


[I 2025-05-05 08:25:05,371] Trial 0 finished with value: 0.00998504925519228 and parameters: {'n_estimators': 297, 'max_depth': 10, 'learning_rate': 0.044270188495287625, 'subsample': 0.8066502155293538, 'colsample_bytree': 0.9503483395997685, 'reg_lambda': 8.635312406797713, 'reg_alpha': 4.571346918232657}. Best is trial 0 with value: 0.00998504925519228.
[I 2025-05-05 08:25:06,098] Trial 1 finished with value: 0.01037268154323101 and parameters: {'n_estimators': 172, 'max_depth': 10, 'learning_rate': 0.0926057318194985, 'subsample': 0.961881320645962, 'colsample_bytree': 0.9916452141149037, 'reg_lambda': 2.046530291273023, 'reg_alpha': 2.5266884433704973}. Best is trial 0 with value: 0.00998504925519228.
[I 2025-05-05 08:25:06,687] Trial 2 finished with value: 0.017864521592855453 and parameters: {'n_estimators': 384, 'max_depth': 3, 'learning_rate': 0.07583326624823829, 'subsample': 0.8313029221555959, 'colsample_bytree': 0.8745768775019015, 'reg_lambda': 7.885165843426611, 'reg_alp

✅ Best trial for model 3: RMSE = 0.0086


[I 2025-05-05 08:25:45,652] A new study created in memory with name: no-name-02b3a1c9-4a19-4bcb-a3e3-92ef6a2b1943



🔁 Optimizing model 4/5


[I 2025-05-05 08:25:46,040] Trial 0 finished with value: 0.01679636538028717 and parameters: {'n_estimators': 179, 'max_depth': 9, 'learning_rate': 0.11877575532336121, 'subsample': 0.8849165211648147, 'colsample_bytree': 0.7067387860148038, 'reg_lambda': 2.419622609370354, 'reg_alpha': 9.336758257969365}. Best is trial 0 with value: 0.01679636538028717.
[I 2025-05-05 08:25:48,123] Trial 1 finished with value: 0.012874145992100239 and parameters: {'n_estimators': 466, 'max_depth': 9, 'learning_rate': 0.2985553129619403, 'subsample': 0.7682948897559817, 'colsample_bytree': 0.916469622125999, 'reg_lambda': 4.060864769558426, 'reg_alpha': 1.5110648561772224}. Best is trial 1 with value: 0.012874145992100239.
[I 2025-05-05 08:25:48,417] Trial 2 finished with value: 0.012509548105299473 and parameters: {'n_estimators': 117, 'max_depth': 4, 'learning_rate': 0.07427625931165893, 'subsample': 0.9343151065618789, 'colsample_bytree': 0.8137091089869106, 'reg_lambda': 5.509538621438013, 'reg_alph

✅ Best trial for model 4: RMSE = 0.0090


[I 2025-05-05 08:26:16,000] A new study created in memory with name: no-name-bb3d53ff-5ce9-4384-af94-52d6ece034ee



🔁 Optimizing model 5/5


[I 2025-05-05 08:26:18,024] Trial 0 finished with value: 0.0109377047047019 and parameters: {'n_estimators': 465, 'max_depth': 7, 'learning_rate': 0.23694882970717868, 'subsample': 0.8957539629104173, 'colsample_bytree': 0.7439425731845354, 'reg_lambda': 6.14749289241978, 'reg_alpha': 0.18123235719356262}. Best is trial 0 with value: 0.0109377047047019.
[I 2025-05-05 08:26:20,640] Trial 1 finished with value: 0.02379041723906994 and parameters: {'n_estimators': 333, 'max_depth': 9, 'learning_rate': 0.2772946318704208, 'subsample': 0.7353319800297704, 'colsample_bytree': 0.9921985508855762, 'reg_lambda': 3.429202146044233, 'reg_alpha': 5.369631693845126}. Best is trial 0 with value: 0.0109377047047019.
[I 2025-05-05 08:26:23,126] Trial 2 finished with value: 0.009946524165570736 and parameters: {'n_estimators': 391, 'max_depth': 8, 'learning_rate': 0.07993515206059847, 'subsample': 0.751033236563115, 'colsample_bytree': 0.7648005606129176, 'reg_lambda': 0.8123473381682688, 'reg_alpha': 

✅ Best trial for model 5: RMSE = 0.0088

✨ Ensemble training complete!


# Service 2

In [5]:
import pandas as pd
import numpy as np
from sklearn.utils import resample
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
import optuna
import joblib
import os

ENSEMBLE_SIZE = 5
MODELS_DIR = "model_training/models"
os.makedirs(MODELS_DIR, exist_ok=True)

# Load & preprocess data
df = pd.read_csv("model_training/data/Service2.csv")

# Create features and handle potential issues
X = df[["cpu_usage_pct", "memory_usage_pct", 'latency_ms','memory_allocated', 'cpu_allocated']].copy()

# Add log transformations, handling zeros/small values
X["cpu_log"] = np.log1p(X["cpu_usage_pct"])  # log1p handles zeros safely
X["mem_log"] = np.log1p(X["memory_usage_pct"])

# Calculate ratio safely to avoid division by zero
# Replace zeros with a small value and clip to avoid infinity
eps = 1e-10  # small epsilon value
X["cpu_to_mem"] = X["cpu_usage_pct"] / (X["memory_usage_pct"].replace(0, eps))
X["cpu_to_mem"] = X["cpu_to_mem"].clip(0, 1000)  # Clip to reasonable range

# Replace any remaining infinities with NaN and then fill with a large value
X = X.replace([np.inf, -np.inf], np.nan)
X = X.fillna(9999)  # Use a large but finite number as replacement

y = df[["cpu_usage_pct", "memory_usage_pct"]]

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 600),
        "max_depth": trial.suggest_int("max_depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-3, 10.0, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-3, 10.0, log=True),
        "random_state": 42,
    }

    # Add missing value handling explicitly
    model = XGBRegressor(**params, missing=np.nan)
    model.fit(X_train, y_train)
    preds = model.predict(X_val)
    rmse = mean_squared_error(y_val, preds)  # Return RMSE instead of MSE
    return rmse

# Train ensemble
for i in range(ENSEMBLE_SIZE):
    print(f"\n🔁 Optimizing model {i+1}/{ENSEMBLE_SIZE}")

    # Resample data for diversity
    X_bootstrap, y_bootstrap = resample(X_train, y_train, random_state=i)

    def boot_objective(trial):
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 500),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
            "subsample": trial.suggest_float("subsample", 0.7, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.7, 1.0),
            "reg_lambda": trial.suggest_float("reg_lambda", 0.1, 10.0),
            "reg_alpha": trial.suggest_float("reg_alpha", 0.1, 10.0),
            "random_state": i,
        }
        # Add missing value handling explicitly
        model = XGBRegressor(**params, missing=np.nan)
        model.fit(X_bootstrap, y_bootstrap)
        preds = model.predict(X_val)
        return mean_squared_error(y_val, preds)  # Return RMSE

    try:
        study = optuna.create_study(direction="minimize")
        study.optimize(boot_objective, n_trials=30)

        print(f"✅ Best trial for model {i+1}: RMSE = {study.best_trial.value:.4f}")
        best_model = XGBRegressor(**study.best_params, missing=np.nan)
        best_model.fit(X_bootstrap, y_bootstrap)
        joblib.dump(best_model, f"{MODELS_DIR}/ensemble_model_service2_{i}.pkl")
    except Exception as e:
        print(f" Error training model {i+1}: {str(e)}")

print("\n✨ Ensemble training complete!")

[I 2025-05-05 08:36:25,046] A new study created in memory with name: no-name-61de5e5d-3116-4776-baca-edb2fa9dc06e



🔁 Optimizing model 1/5


[I 2025-05-05 08:36:27,257] Trial 0 finished with value: 0.009620869532227516 and parameters: {'n_estimators': 178, 'max_depth': 4, 'learning_rate': 0.040515448630036595, 'subsample': 0.9123494665208131, 'colsample_bytree': 0.9106803817913371, 'reg_lambda': 6.67357983686763, 'reg_alpha': 1.4982960379288546}. Best is trial 0 with value: 0.009620869532227516.
[I 2025-05-05 08:36:29,036] Trial 1 finished with value: 0.02329869195818901 and parameters: {'n_estimators': 464, 'max_depth': 9, 'learning_rate': 0.21017193981486734, 'subsample': 0.9550080367330003, 'colsample_bytree': 0.8530930835826034, 'reg_lambda': 7.664158297093854, 'reg_alpha': 6.0841196297685345}. Best is trial 0 with value: 0.009620869532227516.
[I 2025-05-05 08:36:29,677] Trial 2 finished with value: 0.03947081416845322 and parameters: {'n_estimators': 427, 'max_depth': 3, 'learning_rate': 0.2331521742091111, 'subsample': 0.8536946542464339, 'colsample_bytree': 0.7017168225711717, 'reg_lambda': 5.8627736587265895, 'reg_a

✅ Best trial for model 1: RMSE = 0.0083


[I 2025-05-05 08:36:58,915] A new study created in memory with name: no-name-0d1b5283-20cb-4906-8f4c-a4b3a9798934



🔁 Optimizing model 2/5


[I 2025-05-05 08:36:59,681] Trial 0 finished with value: 0.029865194112062454 and parameters: {'n_estimators': 456, 'max_depth': 3, 'learning_rate': 0.012485441924238063, 'subsample': 0.8416025824684652, 'colsample_bytree': 0.8191153429343976, 'reg_lambda': 7.4262746601185405, 'reg_alpha': 4.384430223076519}. Best is trial 0 with value: 0.029865194112062454.
[I 2025-05-05 08:37:00,316] Trial 1 finished with value: 0.03670264035463333 and parameters: {'n_estimators': 292, 'max_depth': 4, 'learning_rate': 0.20896995788065179, 'subsample': 0.9401376686551679, 'colsample_bytree': 0.94956739834674, 'reg_lambda': 8.73767810024457, 'reg_alpha': 9.957736610353644}. Best is trial 0 with value: 0.029865194112062454.
[I 2025-05-05 08:37:01,451] Trial 2 finished with value: 0.01797560229897499 and parameters: {'n_estimators': 297, 'max_depth': 9, 'learning_rate': 0.22940343713479644, 'subsample': 0.8973872483194598, 'colsample_bytree': 0.7876727349833061, 'reg_lambda': 4.456614303958301, 'reg_alph

✅ Best trial for model 2: RMSE = 0.0089


[I 2025-05-05 08:37:44,220] A new study created in memory with name: no-name-c832f89b-d12d-4043-9369-3c3f501ef65f



🔁 Optimizing model 3/5


[I 2025-05-05 08:37:45,261] Trial 0 finished with value: 0.024876337498426437 and parameters: {'n_estimators': 475, 'max_depth': 4, 'learning_rate': 0.27863928379500463, 'subsample': 0.844074012521304, 'colsample_bytree': 0.9891932822329517, 'reg_lambda': 4.216733976793644, 'reg_alpha': 4.574006133669545}. Best is trial 0 with value: 0.024876337498426437.
[I 2025-05-05 08:37:49,317] Trial 1 finished with value: 0.02388666570186615 and parameters: {'n_estimators': 345, 'max_depth': 8, 'learning_rate': 0.2784044771556426, 'subsample': 0.7476507811343024, 'colsample_bytree': 0.8186505826457413, 'reg_lambda': 1.0162568370782235, 'reg_alpha': 6.091701138661551}. Best is trial 1 with value: 0.02388666570186615.
[I 2025-05-05 08:37:50,788] Trial 2 finished with value: 0.016700707376003265 and parameters: {'n_estimators': 401, 'max_depth': 8, 'learning_rate': 0.26876055651300534, 'subsample': 0.9442971212257059, 'colsample_bytree': 0.7940151124281561, 'reg_lambda': 8.657101928541637, 'reg_alph

✅ Best trial for model 3: RMSE = 0.0086


[I 2025-05-05 08:38:45,367] A new study created in memory with name: no-name-3f07a264-30c9-4995-8a04-d3b9955268cb



🔁 Optimizing model 4/5


[I 2025-05-05 08:38:46,687] Trial 0 finished with value: 0.012498962692916393 and parameters: {'n_estimators': 490, 'max_depth': 5, 'learning_rate': 0.2390331884605708, 'subsample': 0.8126487358521303, 'colsample_bytree': 0.7061442469405864, 'reg_lambda': 6.902755624709227, 'reg_alpha': 0.5841577535724957}. Best is trial 0 with value: 0.012498962692916393.
[I 2025-05-05 08:38:47,427] Trial 1 finished with value: 0.013453004881739616 and parameters: {'n_estimators': 285, 'max_depth': 8, 'learning_rate': 0.0227539090391687, 'subsample': 0.8971791798854891, 'colsample_bytree': 0.8272655450753685, 'reg_lambda': 6.927044360848631, 'reg_alpha': 7.0501119119009426}. Best is trial 0 with value: 0.012498962692916393.
[I 2025-05-05 08:38:48,401] Trial 2 finished with value: 0.015460990369319916 and parameters: {'n_estimators': 330, 'max_depth': 10, 'learning_rate': 0.1759368959511555, 'subsample': 0.8576176642556521, 'colsample_bytree': 0.7694077496776819, 'reg_lambda': 0.40076320513256203, 'reg

✅ Best trial for model 4: RMSE = 0.0082


[I 2025-05-05 08:39:16,097] A new study created in memory with name: no-name-262d23d5-d47f-485f-86ad-5d3a19915ba5



🔁 Optimizing model 5/5


[I 2025-05-05 08:39:16,695] Trial 0 finished with value: 0.013812679797410965 and parameters: {'n_estimators': 265, 'max_depth': 4, 'learning_rate': 0.1197324594930245, 'subsample': 0.8906668991477161, 'colsample_bytree': 0.8108612869544289, 'reg_lambda': 5.9500162186767085, 'reg_alpha': 1.5291418844895701}. Best is trial 0 with value: 0.013812679797410965.
[I 2025-05-05 08:39:17,842] Trial 1 finished with value: 0.02578379213809967 and parameters: {'n_estimators': 392, 'max_depth': 8, 'learning_rate': 0.15085147430371, 'subsample': 0.751355009977616, 'colsample_bytree': 0.9486461109300568, 'reg_lambda': 8.112242529320199, 'reg_alpha': 7.929999438289674}. Best is trial 0 with value: 0.013812679797410965.
[I 2025-05-05 08:39:19,115] Trial 2 finished with value: 0.015554655343294144 and parameters: {'n_estimators': 371, 'max_depth': 10, 'learning_rate': 0.26611398986170437, 'subsample': 0.9969331359989551, 'colsample_bytree': 0.7728031758187418, 'reg_lambda': 0.5679414659925919, 'reg_alp

✅ Best trial for model 5: RMSE = 0.0092

✨ Ensemble training complete!
