In [1]:
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit

from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
import logging
import optuna
import matplotlib.pyplot as plt

optuna.logging.set_verbosity(optuna.logging.WARNING)

In [24]:
# Paths to service 1 datasets
cpu_path_s1 = "../../results/prometheus_data/service1_cpu_limit_reduction.csv"
memory_path_s1 = "../../results/prometheus_data/new datasets/service1_memory_limit_reduction.csv"
both_path_s1 = "../../results/prometheus_data/service1_both_limits_reduction.csv"

# Import datasets
df_cpu_s1 = pd.read_csv(cpu_path_s1)
df_memory_s1 = pd.read_csv(memory_path_s1)
df_both_s1 = pd.read_csv(both_path_s1)

df_all_s1 = pd.concat([df_cpu_s1, df_memory_s1, df_both_s1], ignore_index=True)
print(df_all_s1.columns)

# Paths to service 2 datasets
cpu_path_s2 = "../../results/prometheus_data/service2_cpu_limit_reduction.csv"
memory_path_s2 = "../../results/prometheus_data/service2_memory_limit_reduction.csv"
both_path_s2 = "../../results/prometheus_data/service2_both_limit_reduction.csv"

# Import datasets
df_cpu_s2 = pd.read_csv(cpu_path_s2)
df_memory_s2 = pd.read_csv(memory_path_s2)
df_both_s2 = pd.read_csv(both_path_s2)

# Combine all three DataFrames
df_all_s2 = pd.concat([df_cpu_s2, df_memory_s2, df_both_s2], ignore_index=True)

# Paths to datasets
cpu_path_hg = "../../results/prometheus_data/hashgen_cpu_limit_reduction.csv"
memory_path_hg = "../../results/prometheus_data/hashgen_memory_limit_reduction.csv"
both_path_hg = "../../results/prometheus_data/hashgen_both_limit_reduction.csv"

# Import datasets
df_cpu_hg = pd.read_csv(cpu_path_hg)
df_memory_hg = pd.read_csv(memory_path_hg)
df_both_hg = pd.read_csv(both_path_hg)

# Combine all three DataFrames
df_all_hg = pd.concat([df_cpu_hg, df_memory_hg, df_both_hg], ignore_index=True)

# Paths to datasets
cpu_path_rp = "../../results/prometheus_data/ranspw_cpu_limit_reduction.csv"
memory_path_rp = "../../results/prometheus_data/randpw_memory_limit_reduction.csv"
both_path_rp = "../../results/prometheus_data/randpw_both_limits_reduction.csv"

# Import datasets
df_cpu_rp = pd.read_csv(cpu_path_rp)
df_memory_rp = pd.read_csv(memory_path_rp)
df_both_rp = pd.read_csv(both_path_rp)

# Combine all three DataFrames
df_all_rp = pd.concat([df_cpu_rp, df_memory_rp, df_both_rp], ignore_index=True)

configs = {
    "Service 1": df_all_s1,
    "Service 2": df_all_s2,
    "HashGen": df_all_hg,
    "RandPw": df_all_rp,
}

test_sizes = [0.3, 0.2, 0.1]

Index(['Timestamp', 'Service', 'CPU Request', 'Memory Request', 'CPU Limit',
       'Memory Limit', 'Latency', 'CPU Usage', 'Memory Usage'],
      dtype='object')


In [25]:
def train_xgboost_model(df, feature_col, test_size=0.2, plot=True):
    
    df = df.sort_values("Timestamp")
    df = df[[feature_col, "CPU Request", "Memory Request", "CPU Limit", "Memory Limit", "Latency"]].dropna()

    features = ["CPU Request", "Memory Request", "CPU Limit", "Memory Limit", "Latency"]
    target = feature_col

    # Normalize memory values if needed
    if "Memory" in feature_col:
        df[feature_col] = df[feature_col] / (1024 * 1024)

    # Scale features
    feature_scaler = MinMaxScaler()
    X_scaled = feature_scaler.fit_transform(df[features])

    # Scale target
    target_scaler = MinMaxScaler()
    y_scaled = target_scaler.fit_transform(df[[target]]).ravel()

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=test_size, shuffle=False)

    # Model training
    model = XGBRegressor(random_state=42)
    model.fit(X_train, y_train)

    # Predictions
    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)

    # Inverse scaling
    pred_train_inv = target_scaler.inverse_transform(pred_train.reshape(-1, 1)).ravel()
    y_train_inv = target_scaler.inverse_transform(y_train.reshape(-1, 1)).ravel()
    pred_test_inv = target_scaler.inverse_transform(pred_test.reshape(-1, 1)).ravel()
    y_test_inv = target_scaler.inverse_transform(y_test.reshape(-1, 1)).ravel()

    # Evaluation
    train_rmse = np.sqrt(mean_squared_error(y_train_inv, pred_train_inv))
    test_rmse = np.sqrt(mean_squared_error(y_test_inv, pred_test_inv))
    train_r2 = r2_score(y_train_inv, pred_train_inv)
    test_r2 = r2_score(y_test_inv, pred_test_inv)

    print(f"{feature_col} - Train RMSE: {train_rmse:.4f}, R²: {train_r2:.4f}")
    print(f"{feature_col} - Test  RMSE: {test_rmse:.4f}, R²: {test_r2:.4f}")

    # if plot:
    #     plt.figure(figsize=(10, 4))
    #     plt.plot(y_test_inv, label="Actual")
    #     plt.plot(pred_test_inv, label="Predicted")
    #     plt.title(f"{feature_col} Prediction (XGBoost)")
    #     plt.legend()
    #     plt.show()

    return model


In [26]:
for name, df in configs.items():
    for test_size in test_sizes:
        print(f"Training XGBoost for {name} - CPU Usage with test size {test_size}")
        model_cpu = train_xgboost_model(df, "CPU Usage", test_size)

        print(f"Training XGBoost for {name} - Memory Usage with test size {test_size}")
        model_mem = train_xgboost_model(df, "Memory Usage", test_size)
        print()


Training XGBoost for Service 1 - CPU Usage with test size 0.3
CPU Usage - Train RMSE: 0.0065, R²: 0.9505
CPU Usage - Test  RMSE: 0.0244, R²: 0.1778
Training XGBoost for Service 1 - Memory Usage with test size 0.3
Memory Usage - Train RMSE: 5.0943, R²: 0.7758
Memory Usage - Test  RMSE: 10.2621, R²: -1.4573

Training XGBoost for Service 1 - CPU Usage with test size 0.2
CPU Usage - Train RMSE: 0.0063, R²: 0.9482
CPU Usage - Test  RMSE: 0.0263, R²: -1.0619
Training XGBoost for Service 1 - Memory Usage with test size 0.2
Memory Usage - Train RMSE: 5.2501, R²: 0.7332
Memory Usage - Test  RMSE: 9.6428, R²: -0.6867

Training XGBoost for Service 1 - CPU Usage with test size 0.1
CPU Usage - Train RMSE: 0.0060, R²: 0.9556
CPU Usage - Test  RMSE: 0.0137, R²: -1.3366
Training XGBoost for Service 1 - Memory Usage with test size 0.1
Memory Usage - Train RMSE: 5.1897, R²: 0.7165
Memory Usage - Test  RMSE: 10.2037, R²: -0.1057

Training XGBoost for Service 2 - CPU Usage with test size 0.3
CPU Usage - T

In [None]:
def train_xgboost_model_optuna(df, feature_col, test_size=0.2, plot=True):
    df = df.sort_values("Timestamp")
    df = df[[feature_col, "CPU Request", "Memory Request", "CPU Limit", "Memory Limit", "Latency"]].dropna()

    features = ["CPU Request", "Memory Request", "CPU Limit", "Memory Limit", "Latency"]
    target = feature_col

    if "Memory" in feature_col:
        df[feature_col] = df[feature_col] / (1024 * 1024)

    feature_scaler = MinMaxScaler()
    X_scaled = feature_scaler.fit_transform(df[features])

    target_scaler = MinMaxScaler()
    y_scaled = target_scaler.fit_transform(df[[target]]).ravel()

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=test_size, shuffle=False)

    def objective(trial):
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 300),
            "max_depth": trial.suggest_int("max_depth", 3, 12),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
            "subsample": trial.suggest_float("subsample", 0.6, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
            "gamma": trial.suggest_float("gamma", 0, 5),
            "reg_alpha": trial.suggest_float("reg_alpha", 0, 5),
            "reg_lambda": trial.suggest_float("reg_lambda", 0, 5),
            "random_state": 42,
        }

        model = XGBRegressor(**params)
        model.fit(X_train, y_train, verbose=False)
        preds = model.predict(X_test)
        rmse = mean_squared_error(y_test, preds)
        return rmse

    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=50, show_progress_bar=False)

    best_params = study.best_params
    best_params["random_state"] = 42
    model = XGBRegressor(**best_params)
    model.fit(X_train, y_train)

    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)

    pred_train_inv = target_scaler.inverse_transform(pred_train.reshape(-1, 1)).ravel()
    y_train_inv = target_scaler.inverse_transform(y_train.reshape(-1, 1)).ravel()
    pred_test_inv = target_scaler.inverse_transform(pred_test.reshape(-1, 1)).ravel()
    y_test_inv = target_scaler.inverse_transform(y_test.reshape(-1, 1)).ravel()

    train_rmse = np.sqrt(mean_squared_error(y_train_inv, pred_train_inv))
    test_rmse = np.sqrt(mean_squared_error(y_test_inv, pred_test_inv))
    train_r2 = r2_score(y_train_inv, pred_train_inv)
    test_r2 = r2_score(y_test_inv, pred_test_inv)

    print(f"{feature_col} - Train RMSE: {train_rmse:.4f}, R²: {train_r2:.4f}")
    print(f"{feature_col} - Test  RMSE: {test_rmse:.4f}, R²: {test_r2:.4f}")
    print("Best Hyperparameters:", best_params)

    # if plot:
    #     plt.figure(figsize=(10, 4))
    #     plt.plot(y_test_inv, label="Actual")
    #     plt.plot(pred_test_inv, label="Predicted")
    #     plt.title(f"{feature_col} Prediction (XGBoost + Optuna)")
    #     plt.legend()
    #     plt.show()

    return model


In [69]:
for name, df in configs.items():
    for test_size in test_sizes:
        print(f"Training XGBoost for {name} - CPU Usage with test size {test_size}")
        model_cpu = train_xgboost_model_optuna(df, "CPU Usage", test_size)

        print(f"Training XGBoost for {name} - Memory Usage with test size {test_size}")
        model_mem = train_xgboost_model_optuna(df, "Memory Usage", test_size)
        print()


Training XGBoost for Service 1 - CPU Usage with test size 0.3
CPU Usage - Train RMSE: 0.0081, R²: 0.9217
CPU Usage - Test  RMSE: 0.0151, R²: 0.6834
Best Hyperparameters: {'n_estimators': 230, 'max_depth': 11, 'learning_rate': 0.2099892620425105, 'subsample': 0.8071373947971165, 'colsample_bytree': 0.7038104119110115, 'gamma': 0.001619005658308222, 'reg_alpha': 2.4619132789049236, 'reg_lambda': 0.949408169569826, 'random_state': 42}
Training XGBoost for Service 1 - Memory Usage with test size 0.3
Memory Usage - Train RMSE: 9.9721, R²: 0.1409
Memory Usage - Test  RMSE: 6.7263, R²: -0.0557
Best Hyperparameters: {'n_estimators': 164, 'max_depth': 6, 'learning_rate': 0.20351355045572472, 'subsample': 0.8064184220485027, 'colsample_bytree': 0.9123269442125039, 'gamma': 0.537604565100349, 'reg_alpha': 0.3565679717555251, 'reg_lambda': 2.880685799333218, 'random_state': 42}

Training XGBoost for Service 1 - CPU Usage with test size 0.2
CPU Usage - Train RMSE: 0.0073, R²: 0.9291
CPU Usage - Tes

In [70]:
def add_rolling_features(df, window=3):
    df = df.copy()
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], format='mixed')

    df = df.sort_values(['Service', 'Timestamp'])  # Service-wise time sorting
    df.set_index('Timestamp', inplace=True)

    # Rolling averages per service
    for col in ['CPU Usage', 'Memory Usage', 'Latency']:
        df[f'{col}_RollingMean'] = df.groupby('Service')[col].transform(lambda x: x.rolling(window, min_periods=1).mean())
        df[f'{col}_RollingSTD'] = df.groupby('Service')[col].transform(lambda x: x.rolling(window, min_periods=1).std())

    # Spike detection
    df["CPU_Spike"] = df["CPU Usage"] - df["CPU Usage_RollingMean"]
    df["Memory_Spike"] = df["Memory Usage"] - df["Memory Usage_RollingMean"]

    # Latency trend direction
    df["Latency_Trend"] = df.groupby("Service")["Latency"].transform(lambda x: x.diff().fillna(0).apply(lambda y: 1 if y > 0 else (-1 if y < 0 else 0)))

    df.reset_index(inplace=True)  # Reset index to include Timestamp again
    df.dropna(inplace=True)  # Optional: drop rows with NaNs from rolling
    return df

In [75]:
def train_xgboost_model_new_features(df, feature_col, test_size=0.2, plot=True):
    df = add_rolling_features(df)
    df = df.sort_values("Timestamp")
    df = df[[feature_col, "CPU Request", "Memory Request", "CPU Limit", "Memory Limit", "Latency",
    "CPU Usage_RollingMean", "Memory Usage_RollingMean", "Latency_RollingMean",
    "CPU Usage_RollingSTD", "Memory Usage_RollingSTD", "Latency_RollingSTD",
    "CPU_Spike", "Memory_Spike", "Latency_Trend"]].dropna()

    features = [
    "CPU Request", "Memory Request", "CPU Limit", "Memory Limit", "Latency",
    "CPU Usage_RollingMean", "Memory Usage_RollingMean", "Latency_RollingMean",
    "CPU Usage_RollingSTD", "Memory Usage_RollingSTD", "Latency_RollingSTD",
    "CPU_Spike", "Memory_Spike", "Latency_Trend"
    ]

    target = feature_col

    if "Memory" in feature_col:
        df[feature_col] = df[feature_col] / (1024 * 1024)

    feature_scaler = MinMaxScaler()
    X_scaled = feature_scaler.fit_transform(df[features])

    target_scaler = MinMaxScaler()
    y_scaled = target_scaler.fit_transform(df[[target]]).ravel()

    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=test_size, shuffle=False)

    def objective(trial):
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 100, 300),
            "max_depth": trial.suggest_int("max_depth", 3, 12),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
            "subsample": trial.suggest_float("subsample", 0.6, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
            "gamma": trial.suggest_float("gamma", 0, 5),
            "reg_alpha": trial.suggest_float("reg_alpha", 0, 5),
            "reg_lambda": trial.suggest_float("reg_lambda", 0, 5),
            "random_state": 42,
        }

        model = XGBRegressor(**params)
        model.fit(X_train, y_train, verbose=False)
        preds = model.predict(X_test)
        rmse = mean_squared_error(y_test, preds)
        return rmse

    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=50, show_progress_bar=False)

    best_params = study.best_params
    best_params["random_state"] = 42
    model = XGBRegressor(**best_params)
    model.fit(X_train, y_train)

    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)

    pred_train_inv = target_scaler.inverse_transform(pred_train.reshape(-1, 1)).ravel()
    y_train_inv = target_scaler.inverse_transform(y_train.reshape(-1, 1)).ravel()
    pred_test_inv = target_scaler.inverse_transform(pred_test.reshape(-1, 1)).ravel()
    y_test_inv = target_scaler.inverse_transform(y_test.reshape(-1, 1)).ravel()

    train_rmse = np.sqrt(mean_squared_error(y_train_inv, pred_train_inv))
    test_rmse = np.sqrt(mean_squared_error(y_test_inv, pred_test_inv))
    train_r2 = r2_score(y_train_inv, pred_train_inv)
    test_r2 = r2_score(y_test_inv, pred_test_inv)

    print(f"{feature_col} - Train RMSE: {train_rmse:.4f}, R²: {train_r2:.4f}")
    print(f"{feature_col} - Test  RMSE: {test_rmse:.4f}, R²: {test_r2:.4f}")
    print("Best Hyperparameters:", best_params)

    # if plot:
    #     plt.figure(figsize=(10, 4))
    #     plt.plot(y_test_inv, label="Actual")
    #     plt.plot(pred_test_inv, label="Predicted")
    #     plt.title(f"{feature_col} Prediction (XGBoost + Optuna)")
    #     plt.legend()
    #     plt.show()

    return model


In [76]:
for name, df in configs.items():
    for test_size in test_sizes:
        print(f"Training XGBoost for {name} - CPU Usage with test size {test_size}")
        model_cpu = train_xgboost_model_new_features(df, "CPU Usage", test_size)

        print(f"Training XGBoost for {name} - Memory Usage with test size {test_size}")
        model_mem = train_xgboost_model_new_features(df, "Memory Usage", test_size)
        print()


Training XGBoost for Service 1 - CPU Usage with test size 0.3
CPU Usage - Train RMSE: 0.0013, R²: 0.9981
CPU Usage - Test  RMSE: 0.0014, R²: 0.9973
Best Hyperparameters: {'n_estimators': 185, 'max_depth': 5, 'learning_rate': 0.29058368439756266, 'subsample': 0.9556875816167616, 'colsample_bytree': 0.9119056367951991, 'gamma': 0.015652571329133252, 'reg_alpha': 0.4480593787950613, 'reg_lambda': 1.6482541254779657, 'random_state': 42}
Training XGBoost for Service 1 - Memory Usage with test size 0.3
Memory Usage - Train RMSE: 5.0474, R²: 0.7798
Memory Usage - Test  RMSE: 5.4257, R²: 0.3131
Best Hyperparameters: {'n_estimators': 121, 'max_depth': 8, 'learning_rate': 0.16268645355411424, 'subsample': 0.6201741512013198, 'colsample_bytree': 0.9798480298638017, 'gamma': 0.000370898367562187, 'reg_alpha': 4.4275992521611585, 'reg_lambda': 2.9615841131031115, 'random_state': 42}

Training XGBoost for Service 1 - CPU Usage with test size 0.2
CPU Usage - Train RMSE: 0.0028, R²: 0.9896
CPU Usage -