In [279]:
import pandas as pd
import numpy as np
import math
from scipy.stats import norm
import datetime
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

In [280]:
def find_upper_bound(mu, std, alpha=0.95):
    z_value = norm.ppf(alpha)
    return mu + std * z_value

In [281]:
gpu_conv_value = 0.00021648585046
gpu_no_norm_conv_value = 6958.933333333333333
per_unit_energy_cons = 1
target_qos = ["90", "91", "92", "93", "94", "95", "96", "97", "98", "99"]

In [282]:
original_dataset = pd.read_csv("../saved_data/ali20/ali20_g.csv")
scaler = MinMaxScaler()
factor=0.8
scaler.fit(original_dataset.avggpu.iloc[:int(original_dataset.shape[0]*factor)].values.reshape(-1,1))
original_dataset.describe()

Unnamed: 0.1,Unnamed: 0,time,avggpu,avggpumem
count,14358.0,14358.0,14358.0,14358.0
mean,8928.5,1596240000.0,20624310.0,3858848.0
std,4144.941918,1243483.0,4407054.0,940940.6
min,1750.0,1594087000.0,6682396.0,1223004.0
25%,5339.25,1595163000.0,17808580.0,3268815.0
50%,8928.5,1596240000.0,20882870.0,3882400.0
75%,12517.75,1597317000.0,23577880.0,4509566.0
max,16107.0,1598394000.0,33683920.0,6902691.0


In [283]:
hbnn_results = pd.read_csv("output_HBNN-ali20_g-gpu-w288-h2.csv")
hbnn_results.drop(labels=["Unnamed: 0"], inplace=True, axis=1)
hbnn_results["model"] = ["HBNN" for i in range(len(hbnn_results))]
hbnn_results.rename(columns={"avggpu": "pred_norm_gpu", "std": "pred_std", "labels": "true_norm_gpu"}, inplace=True)
first_column = hbnn_results.pop('model')
hbnn_results.insert(0, 'model', first_column)
hbnn_results["true_gpu"] = original_dataset["avggpu"].values[-2582:]
hbnn_results["true_n_gpu"] = hbnn_results.apply(lambda row: math.ceil(row["true_gpu"]/gpu_no_norm_conv_value), axis=1)
# hbnn_results.drop(columns=['true_norm_gpu'], inplace=True)
hbnn_results["pred_gpu"] = scaler.inverse_transform(hbnn_results["pred_norm_gpu"].values.reshape(-1,1))
for target in target_qos:
    alpa = float(int(target)/100)
    hbnn_results[f"ub_{target}_norm"] = hbnn_results.apply(lambda row: find_upper_bound(row["pred_norm_gpu"], row["pred_std"], alpha=alpa), axis=1)
    hbnn_results[f"ub_{target}"] = scaler.inverse_transform(hbnn_results[f"ub_{target}_norm"].values.reshape(-1,1))
    hbnn_results[f"pred_n_gpu_{target}"] = hbnn_results.apply(lambda row: math.ceil(row[f"ub_{target}"]/gpu_no_norm_conv_value), axis=1)
hbnn_results.to_csv("scenarios/hbnn_results.csv")
hbnn_results.describe()

Unnamed: 0,pred_norm_gpu,pred_std,true_norm_gpu,true_gpu,true_n_gpu,pred_gpu,ub_90_norm,ub_90,pred_n_gpu_90,ub_91_norm,...,pred_n_gpu_96,ub_97_norm,ub_97,pred_n_gpu_97,ub_98_norm,ub_98,pred_n_gpu_98,ub_99_norm,ub_99,pred_n_gpu_99
count,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,...,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0
mean,0.35742,0.049786,0.328949,18227520.0,2619.806352,18333240.0,0.421223,19704640.0,2832.052672,0.424171,...,2904.207204,0.451057,20345900.0,2924.202556,0.459668,20530980.0,2950.803641,0.473239,20822690.0,2992.72928
std,0.187234,0.007063,0.23252,5355664.0,769.605537,4024433.0,0.188831,4058763.0,583.245436,0.188915,...,585.367004,0.189721,4077902.0,586.005779,0.189995,4083786.0,586.841574,0.190441,4093385.0,588.223474
min,0.109442,0.016079,-0.172291,6682396.0,961.0,13003160.0,0.167206,14244740.0,2047.0,0.169874,...,2113.0,0.194216,14825300.0,2131.0,0.202011,14992860.0,2155.0,0.214298,15256960.0,2193.0
25%,0.143482,0.04626,0.106536,13104650.0,1884.0,13734810.0,0.21061,15177680.0,2181.5,0.213763,...,2260.0,0.242669,15866750.0,2280.5,0.251706,16061000.0,2308.25,0.265869,16365420.0,2352.0
50%,0.372455,0.04921,0.36254,19001220.0,2731.0,18656420.0,0.433462,19967700.0,2869.5,0.436478,...,2933.0,0.460769,20554650.0,2954.5,0.468404,20718750.0,2977.5,0.481697,21004480.0,3019.0
75%,0.516378,0.05296,0.51967,22620420.0,3251.0,21749920.0,0.581171,23142600.0,3325.75,0.58413,...,3398.0,0.611112,23786140.0,3419.0,0.619926,23975600.0,3445.75,0.633787,24273520.0,3488.5
max,0.825197,0.110551,0.915283,31732610.0,4560.0,28387730.0,0.898194,29956740.0,4305.0,0.901566,...,4388.0,0.932327,30690400.0,4411.0,0.942178,30902150.0,4441.0,0.957705,31235890.0,4489.0


In [284]:
monte_results = pd.read_csv("output_MCDLSTM-ali20_g-gpu-w288-h2.csv")
monte_results.drop(labels=["Unnamed: 0"], inplace=True, axis=1)
monte_results["model"] = ["MCD" for i in range(len(monte_results))]
monte_results.rename(columns={"avggpu": "pred_norm_gpu", "std": "pred_std", "labels": "true_norm_gpu"}, inplace=True)
first_column = monte_results.pop('model')
monte_results.insert(0, 'model', first_column)
monte_results["true_gpu"] = original_dataset["avggpu"].values[-2582:]
monte_results["true_n_gpu"] = monte_results.apply(lambda row: math.ceil(row["true_gpu"]/gpu_no_norm_conv_value), axis=1)
# monte_results.drop(columns=['true_norm_gpu'], inplace=True)
monte_results["pred_gpu"] = scaler.inverse_transform(monte_results["pred_norm_gpu"].values.reshape(-1,1))

for target in target_qos:
    alpa = float(int(target)/100)
    monte_results[f"ub_{target}_norm"] = monte_results.apply(lambda row: find_upper_bound(row["pred_norm_gpu"], row["pred_std"], alpha=alpa), axis=1)
    monte_results[f"ub_{target}"] = scaler.inverse_transform(monte_results[f"ub_{target}_norm"].values.reshape(-1,1))
    monte_results[f"pred_n_gpu_{target}"] = monte_results.apply(lambda row: math.ceil(row[f"ub_{target}"]/gpu_no_norm_conv_value), axis=1)
monte_results.to_csv("scenarios/monte_results.csv")
monte_results.describe()

Unnamed: 0,pred_norm_gpu,pred_std,true_norm_gpu,true_gpu,true_n_gpu,pred_gpu,ub_90_norm,ub_90,pred_n_gpu_90,ub_91_norm,...,pred_n_gpu_96,ub_97_norm,ub_97,pred_n_gpu_97,ub_98_norm,ub_98,pred_n_gpu_98,ub_99_norm,ub_99,pred_n_gpu_99
count,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,...,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0
mean,0.337572,6.597421e-08,0.328949,18227520.0,2619.806352,17906630.0,0.337572,17906640.0,2573.692874,0.337572,...,2573.692874,0.337572,17906640.0,2573.692874,0.337573,17906640.0,2573.693261,0.337573,17906640.0,2573.693261
std,0.214559,6.55825e-08,0.23252,5355664.0,769.605537,4611775.0,0.214559,4611776.0,662.714245,0.214559,...,662.714245,0.214559,4611777.0,662.714245,0.214559,4611777.0,662.713676,0.214559,4611777.0,662.713676
min,-0.039172,0.0,-0.172291,6682396.0,961.0,9808823.0,-0.039172,9808824.0,1410.0,-0.039172,...,1410.0,-0.039172,9808824.0,1410.0,-0.039172,9808824.0,1410.0,-0.039172,9808824.0,1410.0
25%,0.125868,1.117587e-08,0.106536,13104650.0,1884.0,13356230.0,0.125868,13356230.0,1919.5,0.125868,...,1919.5,0.125868,13356230.0,1919.5,0.125868,13356230.0,1919.5,0.125868,13356230.0,1919.5
50%,0.367512,5.960465e-08,0.36254,19001220.0,2731.0,18550160.0,0.367512,18550160.0,2666.0,0.367512,...,2666.0,0.367512,18550160.0,2666.0,0.367512,18550160.0,2666.0,0.367512,18550160.0,2666.0
75%,0.512203,8.940697e-08,0.51967,22620420.0,3251.0,21660170.0,0.512203,21660180.0,3112.75,0.512203,...,3112.75,0.512203,21660180.0,3112.75,0.512203,21660180.0,3112.75,0.512203,21660180.0,3112.75
max,0.893295,2.980232e-07,0.915283,31732610.0,4560.0,29851440.0,0.893295,29851450.0,4290.0,0.893295,...,4290.0,0.893295,29851450.0,4290.0,0.893295,29851450.0,4290.0,0.893295,29851450.0,4290.0


In [285]:
flbnn_results = pd.read_csv("output_FLBNN-ali20_g-gpu-w288-h2.csv")
flbnn_results.drop(labels=["Unnamed: 0"], inplace=True, axis=1)
flbnn_results["model"] = ["HBNN++" for i in range(len(flbnn_results))]
flbnn_results.rename(columns={"avggpu": "pred_norm_gpu", "std": "pred_std", "labels": "true_norm_gpu"}, inplace=True)
first_column = flbnn_results.pop('model')
flbnn_results.insert(0, 'model', first_column)
flbnn_results["true_gpu"] = original_dataset["avggpu"].values[-2582:]
flbnn_results["true_n_gpu"] = flbnn_results.apply(lambda row: math.ceil(row["true_gpu"]/gpu_no_norm_conv_value), axis=1)
# flbnn_results.drop(columns=['true_norm_gpu'], inplace=True)
flbnn_results["pred_gpu"] = scaler.inverse_transform(flbnn_results["pred_norm_gpu"].values.reshape(-1,1))

for target in target_qos:
    alpa = float(int(target)/100)
    flbnn_results[f"ub_{target}_norm"] = flbnn_results.apply(lambda row: find_upper_bound(row["pred_norm_gpu"], row["pred_std"], alpha=alpa), axis=1)
    flbnn_results[f"ub_{target}"] = scaler.inverse_transform(flbnn_results[f"ub_{target}_norm"].values.reshape(-1,1))
    flbnn_results[f"pred_n_gpu_{target}"] = flbnn_results.apply(lambda row: math.ceil(row[f"ub_{target}"]/gpu_no_norm_conv_value), axis=1)
flbnn_results.to_csv("scenarios/flbnn_results.csv")
flbnn_results.describe()

Unnamed: 0,pred_norm_gpu,pred_std,true_norm_gpu,true_gpu,true_n_gpu,pred_gpu,ub_90_norm,ub_90,pred_n_gpu_90,ub_91_norm,...,pred_n_gpu_96,ub_97_norm,ub_97,pred_n_gpu_97,ub_98_norm,ub_98,pred_n_gpu_98,ub_99_norm,ub_99,pred_n_gpu_99
count,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,...,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0
mean,0.362692,0.066016,0.328949,18227520.0,2619.806352,18446550.0,0.447295,20265030.0,2912.587529,0.451203,...,3008.252905,0.486855,21115340.0,3034.78079,0.498273,21360760.0,3070.043377,0.516269,21747570.0,3125.620837
std,0.160845,0.010062,0.23252,5355664.0,769.605537,3457235.0,0.172483,3707383.0,532.743167,0.173025,...,546.049709,0.177984,3825629.0,549.741435,0.179578,3859893.0,554.664675,0.182096,3914014.0,562.443335
min,0.12886,0.043463,-0.172291,6682396.0,961.0,13420520.0,0.198997,14928080.0,2146.0,0.202238,...,2225.0,0.231793,15633000.0,2247.0,0.241259,15836450.0,2276.0,0.256178,16157130.0,2322.0
25%,0.163772,0.05372,0.106536,13104650.0,1884.0,14170950.0,0.230023,15594940.0,2241.25,0.233086,...,2316.25,0.261072,16262310.0,2337.25,0.270133,16457070.0,2365.25,0.284297,16761520.0,2409.0
50%,0.404047,0.070484,0.36254,19001220.0,2731.0,19335460.0,0.49673,21327600.0,3065.0,0.500986,...,3169.5,0.539615,22249380.0,3198.0,0.551925,22513970.0,3236.0,0.57151,22934930.0,3296.5
75%,0.510996,0.074996,0.51967,22620420.0,3251.0,21634230.0,0.608714,23734600.0,3411.0,0.613219,...,3522.0,0.654395,24716480.0,3552.0,0.667527,24998740.0,3593.0,0.688476,25449020.0,3657.75
max,0.619214,0.090434,0.915283,31732610.0,4560.0,23960290.0,0.699979,25696260.0,3693.0,0.70371,...,3784.0,0.737744,26507990.0,3810.0,0.748643,26742270.0,3843.0,0.765823,27111530.0,3896.0


In [286]:
lstmq_results = pd.read_csv("LSTMQ/output_LSTMQ-0.95-ali20_g-gpu-w288-h2.csv")
lstmq_results.drop(labels=["Unnamed: 0"], inplace=True, axis=1)
lstmq_results["model"] = ["LSTMQ" for i in range(len(lstmq_results))]
lstmq_results.rename(columns={"avggpu": "pred_norm_gpu", "std": "pred_std", "labels": "true_norm_gpu"}, inplace=True)
first_column = lstmq_results.pop('model')
lstmq_results.insert(0, 'model', first_column)
lstmq_results["true_gpu"] = original_dataset["avggpu"].values[-2582:]
lstmq_results["true_n_gpu"] = lstmq_results.apply(lambda row: math.ceil(row["true_gpu"]/gpu_no_norm_conv_value), axis=1)
# lstmq_results.drop(columns=['true_norm_gpu'], inplace=True)

for target in target_qos:
    alpa = float(int(target)/100)
    target_results = pd.read_csv(f"LSTMQ/output_LSTMQ-{alpa}-ali20_g-gpu-w288-h2.csv")
    lstmq_results[f"ub_{target}_norm"] = target_results.apply(lambda row: find_upper_bound(row["avggpu"], row["std"], alpha=alpa), axis=1)
    lstmq_results[f"ub_{target}"] = scaler.inverse_transform(lstmq_results[f"ub_{target}_norm"].values.reshape(-1,1))
    lstmq_results[f"pred_n_gpu_{target}"] = lstmq_results.apply(lambda row: math.ceil(row[f"ub_{target}"]/gpu_no_norm_conv_value), axis=1)
lstmq_results.to_csv("scenarios/lstmq_results.csv")
lstmq_results.describe()

Unnamed: 0,pred_norm_gpu,pred_std,true_norm_gpu,true_gpu,true_n_gpu,ub_90_norm,ub_90,pred_n_gpu_90,ub_91_norm,ub_91,...,pred_n_gpu_96,ub_97_norm,ub_97,pred_n_gpu_97,ub_98_norm,ub_98,pred_n_gpu_98,ub_99_norm,ub_99,pred_n_gpu_99
count,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,...,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0,2582.0
mean,0.401227,0.0,0.328949,18227520.0,2619.806352,0.365301,18502640.0,2659.324555,0.379118,18799620.0,...,2810.230054,0.410383,19471650.0,2798.58598,0.405689,19370740.0,2784.068552,0.504672,21498300.0,3089.815647
std,0.227837,0.0,0.23252,5355664.0,769.605537,0.22771,4894433.0,703.334291,0.22985,4940434.0,...,678.332355,0.200764,4315268.0,620.102623,0.25381,5455447.0,783.955297,0.203512,4374329.0,628.588877
min,0.00582,0.0,-0.172291,6682396.0,961.0,-0.044874,9686262.0,1392.0,-0.011003,10414300.0,...,1711.0,0.065909,12067450.0,1735.0,-0.19306,6501125.0,935.0,0.136283,13580090.0,1952.0
25%,0.190937,0.0,0.106536,13104650.0,1884.0,0.147834,13828360.0,1988.0,0.155301,13988850.0,...,2146.5,0.223268,15449760.0,2220.25,0.240581,15821880.0,2274.25,0.317426,17473600.0,2511.5
50%,0.43173,0.0,0.36254,19001220.0,2731.0,0.398979,19226530.0,2763.0,0.411144,19488000.0,...,2888.0,0.428514,19861350.0,2854.5,0.454264,20414820.0,2934.0,0.525413,21944130.0,3154.0
75%,0.588165,0.0,0.51967,22620420.0,3251.0,0.551565,22506220.0,3234.75,0.568246,22864770.0,...,3370.5,0.574925,23008330.0,3307.0,0.599153,23529090.0,3382.0,0.663141,24904470.0,3579.0
max,0.982749,0.0,0.915283,31732610.0,4560.0,0.910586,30223110.0,4344.0,0.961678,31321280.0,...,4444.0,0.927526,30587210.0,4396.0,0.991726,31967130.0,4594.0,1.035353,32904860.0,4729.0


In [287]:
perc_upper_bound = 0.05
lstm_results = pd.read_csv("output_LSTM-ali20_g-gpu-w288-h2.csv")
lstm_results.drop(labels=["Unnamed: 0"], inplace=True, axis=1)
lstm_results["model"] = ["LSTMQ" for i in range(len(lstm_results))]
lstm_results.rename(columns={"avggpu": "pred_norm_gpu", "std": "pred_std", "labels": "true_norm_gpu"}, inplace=True)
first_column = lstm_results.pop('model')
lstm_results.insert(0, 'model', first_column)
lstm_results["true_gpu"] = original_dataset["avggpu"].values[-2582:]
lstm_results["true_n_gpu"] = lstm_results.apply(lambda row: math.ceil(row["true_gpu"]/gpu_no_norm_conv_value), axis=1)
# lstm_results.drop(columns=['true_norm_gpu'], inplace=True)
lstm_results["pred_gpu"] = scaler.inverse_transform(lstm_results["pred_norm_gpu"].values.reshape(-1,1))

for target in target_qos:
    alpa = float(int(target)/100)
    lstm_results[f"ub_{target}_norm"] = lstm_results["pred_norm_gpu"].values
    lstm_results[f"ub_{target}_norm"] = [value+(perc_upper_bound*value) for value in lstm_results[f"ub_{target}_norm"].values]
    lstm_results[f"ub_{target}"] = scaler.inverse_transform(lstm_results[f"ub_{target}_norm"].values.reshape(-1,1))
    lstm_results[f"pred_n_gpu_{target}"] = lstm_results.apply(lambda row: math.ceil(row[f"ub_{target}"]/gpu_no_norm_conv_value), axis=1)
lstm_results.to_csv("scenarios/lstm_results.csv")
lstm_results.head(2)

Unnamed: 0,model,pred_norm_gpu,true_norm_gpu,true_gpu,true_n_gpu,pred_gpu,ub_90_norm,ub_90,pred_n_gpu_90,ub_91_norm,...,pred_n_gpu_96,ub_97_norm,ub_97,pred_n_gpu_97,ub_98_norm,ub_98,pred_n_gpu_98,ub_99_norm,ub_99,pred_n_gpu_99
0,LSTMQ,0.835576,0.804885,29189800.0,4195,28610810.0,0.877354,29508810.0,4241,0.877354,...,4241,0.877354,29508810.0,4241,0.877354,29508810.0,4241,0.877354,29508810.0,4241
1,LSTMQ,0.822173,0.81436,29408050.0,4226,28322730.0,0.863281,29206330.0,4197,0.863281,...,4197,0.863281,29206330.0,4197,0.863281,29206330.0,4197,0.863281,29206330.0,4197


<h2>Accuracy</h2>

In [248]:
def calculate_accuracy(df_results, tg):
    mse_value = mean_squared_error(df_results[f"ub_{tg}_norm"].values, df_results["true_norm_gpu"].values)
    mae_value = mean_absolute_error(df_results[f"ub_{tg}_norm"].values, df_results["true_norm_gpu"].values)
    return mse_value, mae_value

def calculate_success(df_results, tg):
    sr_value = sum(df_results.ub_95>df_results.true_gpu)/df_results.shape[0]
    up_value = sum((df_results.ub_95<df_results.true_gpu)*(df_results.true_gpu-df_results.ub_95))/sum(df_results.true_gpu)
    op_value = sum((df_results.ub_95>df_results.true_gpu)*(df_results.ub_95-df_results.true_gpu))/sum(df_results.true_gpu)
    return sr_value, up_value, op_value

In [249]:
models = {"MCDLSTM": monte_results,
          "HBNN": hbnn_results,
          "FLBNN": flbnn_results,
          "LSTMQ": lstmq_results,
          "LSTM": lstm_results
          }

In [250]:
mse_column = []
mae_column = []
model_column = []
target_column = []
for model in models:
    print(model)
    for target in target_qos:
        if model == "LSTMQ":
            a, b = "NA", "NA"
        else:
            a, b = calculate_accuracy(models[model], target)
        mse_column.append(a)
        mae_column.append(b)
        model_column.append(model)
        target_column.append(target)
d = {"model": model_column,
     "qos": target_column,
     "mse": mse_column,
     "mae": mae_column
     }
df_accuracy = pd.DataFrame(data=d)

MCDLSTM
HBNN
FLBNN
LSTMQ
LSTM


In [251]:
df_accuracy.head(1)

Unnamed: 0,model,qos,mse,mae
0,MCDLSTM,90,0.002068,0.033906


In [252]:
sr_column = []
up_column = []
op_column = []
model_column = []
target_column = []
for model in models:
    print(model)
    for target in target_qos:
        a, b, c = calculate_success(models[model], target)
        sr_column.append(a)
        up_column.append(b)
        op_column.append(c)
        model_column.append(model)
        target_column.append(target)
d = {"model": model_column,
     "qos": target_column,
     "SR": sr_column,
     "UP": up_column,
     "OP": op_column
     }
df_success = pd.DataFrame(data=d)

MCDLSTM
HBNN
FLBNN
LSTMQ
LSTM


In [253]:
# sr_value = sum(df_results.ub_95>df_results.true_gpu)/df_results.shape[0]
# print("SR", sr_value)

# up_value = sum((df_results.ub_95<df_results.true_norm_gpu)*(df_results.true_norm_gpu-df_results.ub_95))/sum((df_results.ub_95<df_results.true_norm_gpu)*df_results.true_norm_gpu)
# up_value = sum((df_results.ub_95<df_results.true_norm_gpu)*(df_results.true_norm_gpu-df_results.ub_95))/sum(df_results.ub_95<df_results.true_norm_gpu)

# up_value = sum((df_results.ub_95<df_results.true_norm_gpu)*((df_results.true_norm_gpu-df_results.ub_95)))/sum(df_results.true_norm_gpu)
# print("UP", up_value)

# op_value = sum((df_results.ub_95>df_results.true_norm_gpu)*(df_results.ub_95-df_results.true_norm_gpu))/sum((df_results.ub_95>df_results.true_norm_gpu)*df_results.true_norm_gpu)

# op_value = sum((df_results.ub_95>df_results.true_norm_gpu)*(df_results.ub_95-df_results.true_norm_gpu))/sum(df_results.ub_95>df_results.true_norm_gpu)

# op_value = sum((df_results.ub_95>df_results.true_norm_gpu)*((df_results.ub_95-df_results.true_norm_gpu)))/sum(df_results.true_norm_gpu)
# print("OP", op_value)

In [254]:
df_accuracy.to_csv("accuracy_table.csv")
df_success.to_csv("success_table.csv")

In [255]:
hbnn_results.columns

Index(['model', 'pred_norm_gpu', 'pred_std', 'true_norm_gpu', 'true_gpu',
       'true_n_gpu', 'pred_gpu', 'ub_90_norm', 'ub_90', 'pred_n_gpu_90',
       'ub_91_norm', 'ub_91', 'pred_n_gpu_91', 'ub_92_norm', 'ub_92',
       'pred_n_gpu_92', 'ub_93_norm', 'ub_93', 'pred_n_gpu_93', 'ub_94_norm',
       'ub_94', 'pred_n_gpu_94', 'ub_95_norm', 'ub_95', 'pred_n_gpu_95',
       'ub_96_norm', 'ub_96', 'pred_n_gpu_96', 'ub_97_norm', 'ub_97',
       'pred_n_gpu_97', 'ub_98_norm', 'ub_98', 'pred_n_gpu_98', 'ub_99_norm',
       'ub_99', 'pred_n_gpu_99'],
      dtype='object')

<h2>Baselines</h2>

a. Running exactly the required number of GPUs that would be specified by our schedulers acting as an oracle. Will have minimal energy use, and a 100% success rate.

b. Always running the maximum number of GPUs acting as a dummy predictor. Will have maximal energy use, and a 100% success rate.

c. Always running the GPUs that were specified by the oracle for the previous time window.

In [34]:
# define the costs (expressed in Watt)
unit_cost = 250
cost_on_5_mins = unit_cost/12
turn_on_cost = 0.20*cost_on_5_mins

n_total_gpus = 6742 # total number of GPUs

<h2>Energy scenario 1</h2>

All GPU machines have the same computational power and they consume the same amount of energy.

The scenario has no memory (no GPU state transitions), i.e. the energy consumption is calculated independently from the previous GPUs states

In [259]:
energy_baseline_a = hbnn_results["true_n_gpu"].values.sum()*cost_on_5_mins*turn_on_cost
energy_baseline_b = n_total_gpus*len(hbnn_results)*cost_on_5_mins*turn_on_cost
baseline_c_values = list(hbnn_results["true_n_gpu"].values[:-1])
baseline_c_values.insert(baseline_c_values[0], 0) # because I do not know the first value
energy_baseline_c = np.sum(baseline_c_values)*cost_on_5_mins*turn_on_cost

In [261]:
models = {"MCD": monte_results,
          "HBNN": hbnn_results,
          "HBNN++": flbnn_results,
          "LSTMQ": lstmq_results,
          # "LSTM": lstm_results
          }
tot_energy_column = []
target_column = []
savings_column = []
model_column = []
for target in target_qos:
    target_column.append(target)
    tot_energy_column.append(energy_baseline_a)
    model_column.append("baseline_a")
    savings_column.append(100 - round(energy_baseline_a/energy_baseline_b*100, 2))

    target_column.append(target)
    tot_energy_column.append(energy_baseline_b)
    model_column.append("baseline_b")
    savings_column.append(100 - round(energy_baseline_b/energy_baseline_b*100, 2))

    target_column.append(target)
    tot_energy_column.append(energy_baseline_c)
    model_column.append("baseline_c")
    savings_column.append(100 - round(energy_baseline_c/energy_baseline_b*100, 2))

    for model in models:
        target_column.append(target)
        model_column.append(model)

        en_value = models[model][f"pred_n_gpu_{target}"].values.sum()*cost_on_5_mins*turn_on_cost
        tot_energy_column.append(en_value)
        savings_column.append(100 - round(en_value/energy_baseline_b*100, 2))

d = {"qos": target_column,
     "model": model_column,
     "tot_energy": tot_energy_column,
     "% energy savings": savings_column
     }
df_scenario_1 = pd.DataFrame(data=d)
df_scenario_1.to_csv("scenario_1.csv")

<h2>Energy scenario 3</h2>

All GPU machines have the same computational power and they consume the same amount of energy when ON.
We model also the state of a GPU {ON, OFF}, and there is a fixed cost on transitioning from OFF to ON. No cost from ON to OFF.

Initial scenario: All GPUs needed are ON, and there is no associated cost for this. Then, GPUs are switched ON and OFF based on the predicted workload. The energy consumtpion is calculated accordingly.

In [262]:
def calculate_consumption(n_gpu_predicted, history, fixed_cost_run, fixed_cost_switch):
    if n_gpu_predicted > history:
        transition_cost = (n_gpu_predicted - history)*fixed_cost_switch
    else:
        transition_cost = 0
    return transition_cost + n_gpu_predicted*fixed_cost_run

In [263]:
models = {"MCD": monte_results,
          "HBNN": hbnn_results,
          "HBNN++": flbnn_results,
          "LSTMQ": lstmq_results,
          # "LSTM": lstm_results
          }

In [264]:
total_cost = 0
gpu_history = hbnn_results["true_n_gpu"].values
for i, n_gpu in enumerate(gpu_history):
    if i == 0:
        total_cost += n_gpu
        continue
    total_cost += calculate_consumption(n_gpu, gpu_history[i-1], cost_on_5_mins, turn_on_cost)
energy_baseline_a = total_cost
energy_baseline_b = n_total_gpus*len(hbnn_results)*cost_on_5_mins + turn_on_cost*n_total_gpus

In [265]:
# TODO baseline c

In [266]:
tot_energy_column = []
target_column = []
savings_column = []
model_column = []
for target in target_qos:
    target_column.append(target)
    tot_energy_column.append(energy_baseline_a)
    model_column.append("baseline_a")
    savings_column.append(100 - round(energy_baseline_a/energy_baseline_b*100, 2))

    target_column.append(target)
    tot_energy_column.append(energy_baseline_b)
    model_column.append("baseline_b")
    savings_column.append(100 - round(energy_baseline_b/energy_baseline_b*100, 2))

    for model in models:
        target_column.append(target)
        model_column.append(model)

        total_cost = 0
        gpu_history = models[model][f"pred_n_gpu_{target}"].values
        for i, n_gpu in enumerate(gpu_history):
            if i == 0:
                total_cost += n_gpu*cost_on_5_mins
                continue
            total_cost += calculate_consumption(n_gpu, gpu_history[i-1], cost_on_5_mins, turn_on_cost)
        tot_energy_column.append(total_cost)
        savings_column.append(100 - round(total_cost/energy_baseline_b*100, 2))

d = {"qos": target_column,
     "model": model_column,
     "tot_energy": tot_energy_column,
     "% energy savings": savings_column
     }
df_scenario_3 = pd.DataFrame(data=d)
df_scenario_3.to_csv("scenario_3.csv")

<h2>Energy scenario 2</h2>

All GPUs are of different characteristics. At the beginning of each time window, all GPUs are OFF.

In [274]:
gpus_specs = {"P100": (1596, float(scaler.inverse_transform(np.array(0.000404575892857).reshape(-1,1))), 0.0208),
              "T4": (994, float(scaler.inverse_transform(np.array(0.00056640625).reshape(-1,1))), 0.0058),
              "V100": (1912, float(scaler.inverse_transform(np.array(0.000283203125).reshape(-1,1))), 0.0233),
              "MISC": (2240, float(scaler.inverse_transform(np.array(0.000361596009975).reshape(-1,1))), 0.0208),
              }
for spec in gpus_specs:
    print(f"{spec}: {gpus_specs[spec]}")

P100: (1596, 10659485.301714368, 0.0208)
T4: (994, 10662963.714422604, 0.0058)
V100: (1912, 10656876.492183197, 0.0233)
MISC: (2240, 10658561.4838754, 0.0208)


In [278]:
float(scaler.inverse_transform(np.array(1.45).reshape(-1,1)))

41817367.13571128

In [119]:
def allocate_resources(w_demand: float,
                       df_stat: pd.DataFrame
                       ) -> float:
    df_allocation = df_stat.copy()
    wd_rem = w_demand
    cost = 0
    while wd_rem > 0:
        # if a single GPU can satisfy the remaining work, we choose the cheapest one.
        if wd_rem < max(df_allocation.w):
            df_allocation = df_allocation[df_allocation["w"] > wd_rem]
            df_allocation.sort_values(by='cost', inplace=True)
            cost += df_allocation.cost.head(1).values[0]
            break

        # Otherwise we select the most efficient
        wd_rem -= df_allocation.w.tail(1).values[0]
        cost += df_allocation.cost.tail(1).values[0]
        # remove the GPU used
        df_allocation.drop(df_allocation.tail(1).index,inplace=True) # drop last n rows
    return cost

In [141]:
# build dataframe of GPUs
w = [] # stores workload that GPUs can provide
e = [] # stores energy consumption
c = [] # stores energy cost to turn on GPUs
s = [] # stores the status of GPUs
g_name = [] # stores the name of the GPUs
for gpu_name in gpus_specs:
    w_value = gpus_specs[gpu_name][1]
    n_g = gpus_specs[gpu_name][0]
    e_g = gpus_specs[gpu_name][2]
    c_g = gpus_specs[gpu_name][2]*0.2
    g_name.extend([gpu_name for i in range(n_g)])
    w.extend([w_value for i in range(n_g)])
    e.extend([e_g for i in range(n_g)])
    c.extend([c_g for i in range(n_g)])
    s.extend([1 for i in range(n_g)])

d = {"gpu_name": g_name, 'w': w, 'e': e, 'c': c, 's': s}
df_gpus_status = pd.DataFrame(data=d)

# efficiency of GPU = workload provided / energy required
df_gpus_status['cost'] = df_gpus_status.e + df_gpus_status.s * df_gpus_status.c
df_gpus_status['eff'] = df_gpus_status.w/ df_gpus_status.cost

df_gpus_status = df_gpus_status.sort_values(by='eff')
print(df_gpus_status.head(2))

     gpu_name         w       e         c  s      cost       eff
3370     V100  0.000283  0.0233  0.005825  1  0.029125  0.009724
2868     V100  0.000283  0.0233  0.005825  1  0.029125  0.009724


In [142]:
# pred_workloads = {"baseline_a": list(hbnn_results["true_norm_gpu"].values),
#                   "HBNN": list(hbnn_results["ub_95"].values),
#                   }

pred_workloads = {"baseline_a": hbnn_results["true_norm_gpu"].values[:50],
                  "baseline_b": [1.45 for i in range(len(hbnn_results))], # max possible workload
                  "HBNN": hbnn_results["ub_95"].values[:50],
                  "MCD": monte_results["ub_95"].values[:50],
                  "HBNN++": flbnn_results["ub_95"].values[:50],
                  "LSTMQ": lstmq_results["ub_95"].values[:50],
                  "LSTM": lstm_results["ub_95"].values[:50],
                  }
print(f"{datetime.datetime.now()} -- BEGIN!")
scenario_2_costs = {}
for model in pred_workloads:
    demands = pred_workloads[model]
    tot_costs = [allocate_resources(demand, df_gpus_status) for demand in demands]
    scenario_2_costs[model] = np.sum(tot_costs)
    print(f"{datetime.datetime.now()} -- Done with model {model}!")
print(f"{datetime.datetime.now()} -- END!")

2023-06-23 17:26:25.239993 -- BEGIN!
2023-06-23 17:27:57.800369 -- Done with model baseline_a!


KeyboardInterrupt: 

In [None]:
model_column = [model for model in scenario_2_costs]
energy_value = [100 - round(scenario_2_costs[model]/scenario_2_costs["baseline_b"]*100, 2) for model in scenario_2_costs]
d = {"model": model_column, "% energy savings": energy_value}
scenario_2_costs = pd.DataFrame(data=d)
scenario_2_costs

In [123]:
scenario_2_costs

{'baseline_a': 2317.0266000000797}

In [61]:
hbnn_results.head(1)

Unnamed: 0,model,pred_gpu,pred_std,true_norm_gpu,true_gpu,true_n_gpu,ub_95,pred_n_gpu_95
0,HBNN,0.79237,0.058169,0.804885,29189800.0,3718,0.88805,4103
