In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor

# Power plot

In [None]:
x_train_df = pd.read_csv('X_train_gpus.csv')
y_train_df = pd.read_csv('Y_train_gpus.csv')

eff = x_train_df['psu_eff']
cols = ['ram_power', 'cpu_power', 'nvme_power', 'nic_power', 'storage_power']
x_train_df[cols] = x_train_df[cols].div(eff, axis=0)
x_train_df = x_train_df.drop(columns=['timestamp', 'ac_frequency', 'current', 'nvme_power', 
                                     'energy', 'linkquality', 'state', 'voltage', 'psu_eff'])

true_power = x_train_df['power']
pred_power = x_train_df.iloc[:, 1:].sum(axis=1) + (y_train_df.iloc[:, 0]/eff)

In [None]:
plt.scatter(true_power, pred_power, color='blue', s=1)
plt.xlabel('Power [W]\n Smartplug read')
plt.ylabel('Power [W]\n Powermeter evaluation')
plt.ylim((180, 450))
plt.xlim((180, 450))

xmin, xmax = (180, 450)
x = np.linspace(xmin, xmax, 500) 
y = x 
plt.plot(x, y, 'k--', alpha=0.8)

plt.grid(True)
plt.grid(True)
plt.show()

# Baseline Performance

In [None]:
x_train_df = pd.read_csv('X_train_gpus.csv')
y_train_df = pd.read_csv('Y_train_gpus.csv')

eff = x_train_df['psu_eff']
cols = ['ram_power', 'cpu_power', 'nvme_power', 'nic_power', 'storage_power']
x_train_df[cols] = x_train_df[cols].div(eff, axis=0)
x_train_df = x_train_df.drop(columns=['timestamp', 'ac_frequency', 'current', 'nvme_power', 
                                     'energy', 'linkquality', 'state', 'voltage', 'psu_eff'])

pred = x_train_df['power'] - x_train_df.iloc[:, 1:].sum(axis=1)
base_err = abs(pred -  (y_train_df.iloc[:, 0]))
base_err_rel = base_err/(y_train_df.iloc[:, 0])

In [None]:
print(f"Baseline error [W] is -> mean: {base_err.mean():.2f}  std: {base_err.std():.2f}")
print(f"Baseline relative error is -> mean: {base_err_rel.mean():.2f}  std: {base_err_rel.std():.2f}")

# Models 

In [None]:
x_train_df = pd.read_csv('X_train_gpus.csv')
y_train_df = pd.read_csv('Y_train_gpus.csv')

y = y_train_df.drop(columns='timestamp')
x = x_train_df.drop(columns='timestamp')

x = np.array(x)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

models = {
    "LinearRegression": LinearRegression(),
    "Ridge": Ridge(), #L2 regularizer 
    "Lasso": Lasso(), #L1 regularizer 
    "ElasticNet": ElasticNet() #L2 and L1 regularizers
}

param_grid = {
    "LinearRegression": {},  
    "Ridge": {"alpha": [0.01, 0.1, 1, 10, 100]},
    "Lasso": {"alpha": [0.01, 0.1, 1, 10, 100]},
    "ElasticNet": {"alpha": [0.01, 0.1, 1, 10], "l1_ratio": [0.1, 0.5, 0.9]}
}

best_models = {}

for name, model in models.items():
    grid = GridSearchCV(model, param_grid[name], cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
    grid.fit(X_train, y_train)
    best_models[name] = grid.best_estimator_

    y_train_pred = best_models[name].predict(X_train)
    train_mpe = mean_absolute_percentage_error(y_train, y_train_pred)

    print(f"Best params for {name}: {grid.best_params_}")
    print(f"Best MAE for {name} (cross-val): {-grid.best_score_}")
    print(f"Best MPE for {name} (cross-val): {train_mpe}%\n")

for name, model in best_models.items():
    y_pred = model.predict(X_test)
    test_mae = mean_absolute_error(y_test, y_pred)
    test_mpe = mean_absolute_percentage_error(y_test, y_pred)

    print(f"Test MAE for {name}: {test_mae}")
    print(f"Test MPE for {name}: {test_mpe}%\n")

In [None]:
x_train_df = pd.read_csv('X_train_gpus.csv')
y_train_df = pd.read_csv('Y_train_gpus.csv')

y = y_train_df.drop(columns='timestamp')
x = x_train_df.drop(columns='timestamp')

x = np.array(x)
y = np.array(y).ravel()

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

mpe = lambda y_true, y_pred: np.mean(abs(y_true - y_pred)/y_true)

models = {
    "RandomForest": RandomForestRegressor(random_state=42),
}

param_grid = {
    "RandomForest": {
        "n_estimators": [50, 100, 200],
        "max_depth": [5, 10, None],
        "min_samples_split": [2, 5, 10]
    }
}

best_models = {}

for name, model in models.items():
    print(f"--- {name} ---")
    grid = GridSearchCV(model, param_grid[name], cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
    grid.fit(X_train, y_train)
    best_models[name] = grid.best_estimator_

    y_train_pred = best_models[name].predict(X_train)
    train_mpe = mpe(y_train, y_train_pred)

    print(f"Best params: {grid.best_params_}")
    print(f"Best MAE (cross-val): {-grid.best_score_}")
    print(f"Best MPE (cross-val): {train_mpe}%\n")

for name, model in best_models.items():
    print(f"--- {name} (Test) ---")
    y_pred = model.predict(X_test)
    test_mae = mean_absolute_error(y_test, y_pred)
    test_mpe = mpe(y_test, y_pred)

    print(f"Test MAE: {test_mae}")
    print(f"Test MPE: {test_mpe}%\n")

In [None]:
x_train_df = pd.read_csv('X_train_gpus.csv')
y_train_df = pd.read_csv('Y_train_gpus.csv')

y = y_train_df.drop(columns='timestamp')
x = x_train_df.drop(columns='timestamp')

x = np.array(x)
y = np.array(y).ravel()

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

mpe = lambda y_true, y_pred: np.mean(abs(y_true - y_pred)/y_true)

models = {
    "XGBoost": xgb.XGBRegressor(tree_method='hist', device='cuda', objective='reg:absoluteerror', random_state=42)
}

param_grid = {
    "XGBoost": {
        "n_estimators": [50, 100, 200],
        "max_depth": [3, 6, 9],
        "learning_rate": [0.01, 0.1, 0.2],
        "reg_alpha": [0, 0.1, 1],
        "reg_lambda": [1, 10]
    }
}

best_models = {}

for name, model in models.items():
    print(f"--- {name} ---")
    grid = GridSearchCV(model, param_grid[name], cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
    grid.fit((X_train), (y_train))
    best_models[name] = grid.best_estimator_

    y_train_pred = best_models[name].predict(X_train)
    train_mpe = mpe(y_train, y_train_pred)

    print(f"Best params: {grid.best_params_}")
    print(f"Best MAE (cross-val): {-grid.best_score_}")
    print(f"Best MPE (cross-val): {train_mpe}%\n")

for name, model in best_models.items():
    print(f"--- {name} (Test) ---")
    y_pred = model.predict(X_test)
    test_mae = mean_absolute_error(y_test, y_pred)
    test_mpe = mpe(y_test, y_pred)

    print(f"Test MAE: {test_mae}")
    print(f"Test MPE: {test_mpe}%\n")