In [22]:
import pandas as pd
import numpy as np
from ngboost import NGBRegressor
from ngboost.distns import Normal
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [23]:
# Paths to service 1 datasets
cpu_path_s1 = "../../results/prometheus_data/service1_cpu_limit_reduction.csv"
memory_path_s1 = "../../results/prometheus_data/new datasets/service1_memory_limit_reduction.csv"
both_path_s1 = "../../results/prometheus_data/service1_both_limits_reduction.csv"

# Import datasets
df_cpu_s1 = pd.read_csv(cpu_path_s1)
df_memory_s1 = pd.read_csv(memory_path_s1)
df_both_s1 = pd.read_csv(both_path_s1)

df_all_s1 = pd.concat([df_cpu_s1, df_memory_s1, df_both_s1], ignore_index=True)
print(df_all_s1.columns)

# Paths to service 2 datasets
cpu_path_s2 = "../../results/prometheus_data/service2_cpu_limit_reduction.csv"
memory_path_s2 = "../../results/prometheus_data/service2_memory_limit_reduction.csv"
both_path_s2 = "../../results/prometheus_data/service2_both_limit_reduction.csv"

# Import datasets
df_cpu_s2 = pd.read_csv(cpu_path_s2)
df_memory_s2 = pd.read_csv(memory_path_s2)
df_both_s2 = pd.read_csv(both_path_s2)

# Combine all three DataFrames
df_all_s2 = pd.concat([df_cpu_s2, df_memory_s2, df_both_s2], ignore_index=True)

# Paths to datasets
cpu_path_hg = "../../results/prometheus_data/hashgen_cpu_limit_reduction.csv"
memory_path_hg = "../../results/prometheus_data/hashgen_memory_limit_reduction.csv"
both_path_hg = "../../results/prometheus_data/hashgen_both_limit_reduction.csv"

# Import datasets
df_cpu_hg = pd.read_csv(cpu_path_hg)
df_memory_hg = pd.read_csv(memory_path_hg)
df_both_hg = pd.read_csv(both_path_hg)

# Combine all three DataFrames
df_all_hg = pd.concat([df_cpu_hg, df_memory_hg, df_both_hg], ignore_index=True)

# Paths to datasets
cpu_path_rp = "../../results/prometheus_data/ranspw_cpu_limit_reduction.csv"
memory_path_rp = "../../results/prometheus_data/randpw_memory_limit_reduction.csv"
both_path_rp = "../../results/prometheus_data/randpw_both_limits_reduction.csv"

# Import datasets
df_cpu_rp = pd.read_csv(cpu_path_rp)
df_memory_rp = pd.read_csv(memory_path_rp)
df_both_rp = pd.read_csv(both_path_rp)

# Combine all three DataFrames
df_all_rp = pd.concat([df_cpu_rp, df_memory_rp, df_both_rp], ignore_index=True)

Index(['Timestamp', 'Service', 'CPU Request', 'Memory Request', 'CPU Limit',
       'Memory Limit', 'Latency', 'CPU Usage', 'Memory Usage'],
      dtype='object')


In [24]:
configs = {
    "Service 1": df_all_s1,
    "Service 2": df_all_s2,
    "HashGen": df_all_hg,
    "RandPw": df_all_rp,
}

test_sizes = [0.3, 0.2, 0.1]

In [27]:
def train_ngboost_model(df, feature_col, test_size=0.2, plot=True):
    df = df.sort_values("Timestamp")
    df = df[[feature_col, "CPU Request", "Memory Request", "CPU Limit", "Memory Limit", "Latency"]].dropna()

    features = ["CPU Request", "Memory Request", "CPU Limit", "Memory Limit", "Latency"]
    target = feature_col

    if "Memory" in feature_col:
        df[feature_col] = df[feature_col] / (1024 * 1024)

    # Scale features
    feature_scaler = MinMaxScaler()
    X_scaled = feature_scaler.fit_transform(df[features])

    # Scale target
    target_scaler = MinMaxScaler()
    y_scaled = target_scaler.fit_transform(df[[target]]).ravel()

    # Split
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=test_size, shuffle=False)

    model = NGBRegressor(Dist=Normal, verbose=False)
    model.fit(X_train, y_train)

    # Predictions
    pred_train = model.predict(X_train)
    pred_test = model.predict(X_test)

    # Inverse transform predictions
    pred_train_inv = target_scaler.inverse_transform(pred_train.reshape(-1, 1)).ravel()
    y_train_inv = target_scaler.inverse_transform(y_train.reshape(-1, 1)).ravel()
    pred_test_inv = target_scaler.inverse_transform(pred_test.reshape(-1, 1)).ravel()
    y_test_inv = target_scaler.inverse_transform(y_test.reshape(-1, 1)).ravel()

    # Evaluation
    train_rmse = np.sqrt(mean_squared_error(y_train_inv, pred_train_inv))
    test_rmse = np.sqrt(mean_squared_error(y_test_inv, pred_test_inv))
    train_r2 = r2_score(y_train_inv, pred_train_inv)
    test_r2 = r2_score(y_test_inv, pred_test_inv)

    print(f"{feature_col} - Train RMSE: {train_rmse:.4f}, R²: {train_r2:.4f}")
    print(f"{feature_col} - Test  RMSE: {test_rmse:.4f}, R²: {test_r2:.4f}")

    # if plot:
    #     plt.figure(figsize=(10, 4))
    #     plt.plot(y_test_inv, label="Actual")
    #     plt.plot(pred_test_inv, label="Predicted")
    #     plt.title(f"{feature_col} Prediction (NGBoost)")
    #     plt.legend()
    #     plt.show()

    return model


In [28]:
for name, df in configs.items():
    for test_size in test_sizes:
        print(f"Training NGBoost for {name} - CPU Usage with test size {test_size}")
        model_cpu = train_ngboost_model(df, "CPU Usage", test_size)

        print(f"Training NGBoost for {name} - Memory Usage with test size {test_size}")
        model_mem = train_ngboost_model(df, "Memory Usage", test_size)
        print()


Training NGBoost for Service 1 - CPU Usage with test size 0.3
CPU Usage - Train RMSE: 0.0074, R²: 0.9347
CPU Usage - Test  RMSE: 0.0233, R²: 0.2478
Training NGBoost for Service 1 - Memory Usage with test size 0.3


  self.var = self.scale**2
  self.var = self.scale**2


Memory Usage - Train RMSE: 6.5573, R²: 0.6285
Memory Usage - Test  RMSE: 6.9937, R²: -0.1413

Training NGBoost for Service 1 - CPU Usage with test size 0.2
CPU Usage - Train RMSE: 0.0073, R²: 0.9294
CPU Usage - Test  RMSE: 0.0203, R²: -0.2257
Training NGBoost for Service 1 - Memory Usage with test size 0.2


  self.var = self.scale**2
  self.var = self.scale**2
  self.var = self.scale**2


Memory Usage - Train RMSE: 6.1864, R²: 0.6295
Memory Usage - Test  RMSE: 8.3760, R²: -0.2726

Training NGBoost for Service 1 - CPU Usage with test size 0.1
CPU Usage - Train RMSE: 0.0071, R²: 0.9393
CPU Usage - Test  RMSE: 0.0142, R²: -1.5002
Training NGBoost for Service 1 - Memory Usage with test size 0.1


  self.var = self.scale**2
  self.var = self.scale**2
  self.var = self.scale**2


Memory Usage - Train RMSE: 6.2379, R²: 0.5903
Memory Usage - Test  RMSE: 10.0890, R²: -0.0810

Training NGBoost for Service 2 - CPU Usage with test size 0.3
CPU Usage - Train RMSE: 0.0030, R²: 0.9893
CPU Usage - Test  RMSE: 0.0070, R²: 0.9030
Training NGBoost for Service 2 - Memory Usage with test size 0.3
Memory Usage - Train RMSE: 0.2227, R²: 0.3317
Memory Usage - Test  RMSE: 0.2785, R²: -0.2519

Training NGBoost for Service 2 - CPU Usage with test size 0.2
CPU Usage - Train RMSE: 0.0031, R²: 0.9875
CPU Usage - Test  RMSE: 0.0067, R²: 0.8106
Training NGBoost for Service 2 - Memory Usage with test size 0.2
Memory Usage - Train RMSE: 0.2256, R²: 0.3234
Memory Usage - Test  RMSE: 0.2562, R²: -0.0953

Training NGBoost for Service 2 - CPU Usage with test size 0.1
CPU Usage - Train RMSE: 0.0033, R²: 0.9855
CPU Usage - Test  RMSE: 0.0066, R²: 0.2632
Training NGBoost for Service 2 - Memory Usage with test size 0.1
Memory Usage - Train RMSE: 0.2289, R²: 0.3021
Memory Usage - Test  RMSE: 0.217