In [None]:
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"\\utils")

import format_time as ft
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json

EPOCHS = 20
BATCH_SIZE = 128
SAMPLES = 50000

df_stats_train = pd.read_csv(r'../results/mlresults_stats_train.csv')
df_stats_train['duration(s)'] = df_stats_train['duration(s)'].apply(lambda x: ft.plot_time(x))
df_stats_train['step(ms)'] = df_stats_train['step(ms)'].apply(lambda x: ft.plot_time(x))

df_eco2ai_train = pd.read_csv(r'../results/mlresults_eco2ai_nostats_train.csv')
df_eco2ai_train['duration(s)'] = df_eco2ai_train['duration(s)'].apply(lambda x: ft.plot_time(x))
df_eco2ai_train['step(ms)'] = df_eco2ai_train['step(ms)'].apply(lambda x: ft.plot_time(x))

df_codecarbon_train = pd.read_csv(r'../results/mlresults_codecarbon_nostats_train.csv')
df_codecarbon_train['duration(s)'] = df_codecarbon_train['duration(s)'].apply(lambda x: ft.plot_time(x))
df_codecarbon_train['step(ms)'] = df_codecarbon_train['step(ms)'].apply(lambda x: ft.plot_time(x))

df_stats_test = pd.read_csv(r'../results/mlresults_stats_test.csv')
df_stats_test['duration(s)'] = df_stats_test['duration(s)'].apply(lambda x: ft.plot_time(x))
df_stats_test['step(ms)'] = df_stats_test['step(ms)'].apply(lambda x: ft.plot_time(x))

df_eco2ai_test = pd.read_csv(r'../results/mlresults_eco2ai_nostats_test.csv')
df_eco2ai_test['duration(s)'] = df_eco2ai_test['duration(s)'].apply(lambda x: ft.plot_time(x))
df_eco2ai_test['step(ms)'] = df_eco2ai_test['step(ms)'].apply(lambda x: ft.plot_time(x))

df_codecarbon_test = pd.read_csv(r'../results/mlresults_codecarbon_nostats_test.csv')
df_codecarbon_test['duration(s)'] = df_codecarbon_test['duration(s)'].apply(lambda x: ft.plot_time(x))
df_codecarbon_test['step(ms)'] = df_codecarbon_test['step(ms)'].apply(lambda x: ft.plot_time(x))

df_codecarbon = pd.read_csv(r'../results/emissions.csv')
df_codecarbon_results_train = pd.DataFrame(df_codecarbon.values[::2],index=df_codecarbon.index[::2],columns=df_codecarbon.columns).reset_index(drop=True)
df_codecarbon_results_test = pd.DataFrame(df_codecarbon.values[1::2],index=df_codecarbon.index[1::2],columns=df_codecarbon.columns).reset_index(drop=True)

df_eco2ai = pd.read_csv(r'../results/resultsEco2AI.csv')
df_eco2ai_results_train = pd.DataFrame(df_eco2ai.values[::2],index=df_eco2ai.index[::2],columns=df_eco2ai.columns).reset_index(drop=True)
df_eco2ai_results_test = pd.DataFrame(df_eco2ai.values[1::2],index=df_eco2ai.index[1::2],columns=df_eco2ai.columns).reset_index(drop=True)

with open('../results/stats_train.json') as f:
    stats_train = json.load(f)
stats_train = stats_train['exp_0']

with open('../results/stats_test.json') as f:
    stats_test = json.load(f)
stats_test = stats_test['exp_0']

def add_label(x, y, val, ax, distance=0, other=None):
    a = pd.concat({'x': x, 'y': y, 'val': val}, axis=1)
    for i, point in a.iterrows():
        if other is None:
            ax.text(point['x']+distance, point['y'], str(point['val']))
        else:
            ax.text(point['x']+distance, point['y'], str(point['val']) + str(other))

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)}, style="whitegrid")
sns.barplot(x="project_name", y="accuracy", data=df_stats_train, estimator= np.max, errorbar=None)
plt.xticks(rotation=45)
plt.title('Model Max Accuracy')
plt.xlabel("Models")
plt.ylabel("Accuracy (%)")
plt.show()

In [None]:
merged_duration_df = pd.merge(df_stats_train, df_eco2ai_train, on=['project_name', 'epoch'])
merged_duration_df = pd.merge(merged_duration_df, df_codecarbon_train, on=['project_name', 'epoch'])
merged_duration_df = merged_duration_df.drop(columns=['epoch', 'step(ms)_x', 'loss_x', 'accuracy_x', 'step(ms)_y', 'loss_y', 'accuracy_y', 'step(ms)', 'loss', 'accuracy'])
merged_duration_df.rename(columns={"duration(s)_x": "TOSH_stats", "duration(s)_y": "Eco2AI", "duration(s)": "CodeCarbon"}, inplace=True)

sns.set_theme(style="whitegrid", font_scale=2.5)

sns.catplot(x="project_name", y="value", hue="variable", kind="bar", errorbar=lambda x: (x.min(), x.max()), data=pd.melt(merged_duration_df, id_vars=['project_name']), height=16, aspect=2/1, legend_out=False)
plt.xticks(rotation=45)
plt.title('Runtime (training)')
plt.xlabel("Models")
plt.ylabel("Milliseconds (ms)")
plt.legend(loc='upper right', title='Experiment')
plt.show()

merged_duration_df = pd.merge(df_stats_train, df_eco2ai_train, on=['project_name', 'epoch'])
merged_duration_df = pd.merge(merged_duration_df, df_codecarbon_train, on=['project_name', 'epoch'])
merged_duration_df = merged_duration_df.drop(columns=['epoch', 'duration(s)_x', 'loss_x', 'accuracy_x', 'duration(s)_y', 'loss_y', 'accuracy_y', 'duration(s)', 'loss', 'accuracy'])
merged_duration_df.rename(columns={"step(ms)_x": "TOSH_stats", "step(ms)_y": "Eco2AI", "step(ms)": "CodeCarbon"}, inplace=True)

sns.set_theme(style="whitegrid", font_scale=2.5)

sns.catplot(x="project_name", y="value", hue="variable", kind="bar", errorbar=lambda x: (x.min(), x.max()), data=pd.melt(merged_duration_df, id_vars=['project_name']), height=16, aspect=2/1, legend_out=False)
plt.xticks(rotation=45)
plt.title('Runtime per batch (training)')
plt.xlabel("Models")
plt.ylabel("Milliseconds (ms)")
plt.legend(loc='upper right', title='Experiment')
plt.show()

In [None]:
merged_duration_df = pd.merge(df_stats_test, df_eco2ai_test, on=['project_name', 'epoch'])
merged_duration_df = pd.merge(merged_duration_df, df_codecarbon_test, on=['project_name', 'epoch'])
merged_duration_df = merged_duration_df.drop(columns=['epoch', 'step(ms)_x', 'loss_x', 'accuracy_x', 'step(ms)_y', 'loss_y', 'accuracy_y', 'step(ms)', 'loss', 'accuracy'])
merged_duration_df.rename(columns={"duration(s)_x": "TOSH_stats", "duration(s)_y": "Eco2AI", "duration(s)": "CodeCarbon"}, inplace=True)

sns.set_theme(style="whitegrid", font_scale=2.5)

sns.catplot(x="project_name", y="value", hue="variable", kind="bar", errorbar=lambda x: (x.min(), x.max()), data=pd.melt(merged_duration_df, id_vars=['project_name']), height=16, aspect=2/1, legend_out=False)
plt.xticks(rotation=45)
plt.title('Runtime (test)')
plt.xlabel("Models")
plt.ylabel("Milliseconds (ms)")
plt.legend(loc='upper right', title='Experiment')
plt.show()

merged_duration_df = pd.merge(df_stats_test, df_eco2ai_test, on=['project_name', 'epoch'])
merged_duration_df = pd.merge(merged_duration_df, df_codecarbon_test, on=['project_name', 'epoch'])
merged_duration_df = merged_duration_df.drop(columns=['epoch', 'duration(s)_x', 'loss_x', 'accuracy_x', 'duration(s)_y', 'loss_y', 'accuracy_y', 'duration(s)', 'loss', 'accuracy'])
merged_duration_df.rename(columns={"step(ms)_x": "TOSH_stats", "step(ms)_y": "Eco2AI", "step(ms)": "CodeCarbon"}, inplace=True)

sns.set_theme(style="whitegrid", font_scale=2.5)

sns.catplot(x="project_name", y="value", hue="variable", kind="bar", errorbar=lambda x: (x.min(), x.max()), data=pd.melt(merged_duration_df, id_vars=['project_name']), height=16, aspect=2/1, legend_out=False)
plt.xticks(rotation=45)
plt.title('Runtime per batch (testing)')
plt.xlabel("Models")
plt.ylabel("Milliseconds (ms)")
plt.legend(loc='upper right', title='Experiment')
plt.show()

In [None]:
stats_train_tmp = stats_train.copy()

models = df_stats_train.project_name.unique()

for model in models:
    tmp_dict = stats_train_tmp[model]
    for epoch in tmp_dict.keys():
        filtered = filter(lambda num: num >= 0, tmp_dict[epoch]['cpu_delta_power_w'])
        tmp_dict[epoch]['cpu_delta_power_w'] = list(filtered)

        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'start_time'] = tmp_dict[epoch]['start_time']
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'stop_time'] = tmp_dict[epoch]['stop_time']
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'cpu_energy_j'] = np.mean(tmp_dict[epoch]['cpu_energy_j'])
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'cpu_delta_power_w'] = np.mean(tmp_dict[epoch]['cpu_delta_power_w'])
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'cpu_percent'] = np.mean(tmp_dict[epoch]['cpu_percent'])
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'cpu_memory_percent'] = np.mean(tmp_dict[epoch]['cpu_memory_percent'])
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'cpu_temperature_c'] = np.mean(tmp_dict[epoch]['cpu_temperature_c'])
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'gpu_power_w'] = np.mean(tmp_dict[epoch]['gpu_power_w'])
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'gpu_temperature_c'] = np.mean(tmp_dict[epoch]['gpu_temperature_c'])
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'gpu_memory_free_b'] = np.mean(tmp_dict[epoch]['gpu_memory_free_b'])
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'gpu_memory_used_b'] = np.mean(tmp_dict[epoch]['gpu_memory_used_b'])
        df_stats_train.loc[(df_stats_train['project_name'] == model) & (df_stats_train['epoch'] == int(epoch)), 'gpu_percent'] = np.mean(tmp_dict[epoch]['gpu_percent'])
        
stats_test_tmp = stats_test.copy()

models = df_stats_test.project_name.unique()

for model in models:
    tmp_dict = stats_test_tmp[model]
    for epoch in tmp_dict.keys():
        filtered = filter(lambda num: num >= 0, tmp_dict[epoch]['cpu_delta_power_w'])
        tmp_dict[epoch]['cpu_delta_power_w'] = list(filtered)
        
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'start_time'] = tmp_dict[epoch]['start_time']
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'stop_time'] = tmp_dict[epoch]['stop_time']
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'cpu_energy_j'] = np.mean(tmp_dict[epoch]['cpu_energy_j'])
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'cpu_delta_power_w'] = np.mean(tmp_dict[epoch]['cpu_delta_power_w'])
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'cpu_percent'] = np.mean(tmp_dict[epoch]['cpu_percent'])
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'cpu_memory_percent'] = np.mean(tmp_dict[epoch]['cpu_memory_percent'])
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'cpu_temperature_c'] = np.mean(tmp_dict[epoch]['cpu_temperature_c'])
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'gpu_power_w'] = np.mean(tmp_dict[epoch]['gpu_power_w'])
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'gpu_temperature_c'] = np.mean(tmp_dict[epoch]['gpu_temperature_c'])
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'gpu_memory_free_b'] = np.mean(tmp_dict[epoch]['gpu_memory_free_b'])
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'gpu_memory_used_b'] = np.mean(tmp_dict[epoch]['gpu_memory_used_b'])
        df_stats_test.loc[(df_stats_test['project_name'] == model) & (df_stats_test['epoch'] == int(epoch)), 'gpu_percent'] = np.mean(tmp_dict[epoch]['gpu_percent'])

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)}, style="whitegrid")
sns.barplot(x="project_name", y="cpu_delta_power_w", data=df_stats_train, estimator= np.max, errorbar=None)
plt.xticks(rotation=45)
plt.title('Average CPU power consumption (training)')
plt.xlabel("Models")
plt.ylabel("Average CPU power (W)")
plt.show()

sns.set(rc={'figure.figsize':(11.7,8.27)}, style="whitegrid")
sns.barplot(x="project_name", y="cpu_delta_power_w", data=df_stats_test, estimator= np.max, errorbar=None)
plt.xticks(rotation=45)
plt.title('Average CPU power consumption (testing)')
plt.xlabel("Models")
plt.ylabel("Average CPU power (W)")
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(11.7,8.27)}, style="whitegrid")
sns.barplot(x="project_name", y="gpu_power_w", data=df_stats_train, estimator=lambda x: np.mean(x)/1000)
plt.xticks(rotation=45)
plt.title('Average GPU power consumption (training)')
plt.xlabel("Models")
plt.ylabel("Average GPU power (W)")
plt.show()

sns.set(rc={'figure.figsize':(11.7,8.27)}, style="whitegrid")
sns.barplot(x="project_name", y="gpu_power_w", data=df_stats_test, estimator=lambda x: np.mean(x)/1000)
plt.xticks(rotation=45)
plt.title('Average GPU power consumption (testing)')
plt.xlabel("Models")
plt.ylabel("Average GPU power (W)")
plt.show()

In [None]:
df_stats_train_tmp = df_stats_train.groupby('project_name').mean().reset_index()
df_stats_train_tmp['gpu_power_w'] = round(df_stats_train_tmp['gpu_power_w']/1000)

sns.set(style="whitegrid")
sns.scatterplot(x="gpu_power_w", y="gpu_percent", hue="project_name", data=df_stats_train_tmp, style='project_name', s=150)
plt.title('Average GPU power consumption (training)')
plt.xlabel("Average GPU power (W)")
plt.ylabel("Utilisation (%)")
plt.legend(loc='upper left', title='Experiment')
plt.show()

In [None]:
df_stats_train_tmp = df_stats_train.groupby('project_name').mean().reset_index()
df_stats_train_tmp['cpu_delta_power_w'] = round(df_stats_train_tmp['cpu_delta_power_w'],1)

sns.set(style="whitegrid")
sns.scatterplot(x="cpu_delta_power_w", y="cpu_percent", hue="project_name", data=df_stats_train_tmp, style='project_name', s=150)
plt.title('Average CPU power consumption (training)')
plt.xlabel("Average CPU power (W)")
plt.ylabel("Utilisation (%)")
plt.legend(loc='upper left', title='Experiment')
plt.show()

In [None]:
df_stats_test_tmp = df_stats_test.groupby('project_name').mean().reset_index()
df_stats_test_tmp['gpu_power_w'] = round(df_stats_test_tmp['gpu_power_w']/1000, 1)

sns.set(style="whitegrid")
sns.scatterplot(x="gpu_power_w", y="gpu_percent", hue="project_name", data=df_stats_test_tmp, style='project_name', s=150)
plt.title('Average GPU power consumption (training)')
plt.xlabel("Average GPU power (W)")
plt.ylabel("Utilisation (%)")
plt.legend(loc='upper left', title='Experiment')
plt.show()

In [None]:
df_stats_test_tmp = df_stats_test.groupby('project_name').mean().reset_index()
df_stats_test_tmp['cpu_delta_power_w'] = round(df_stats_test_tmp['cpu_delta_power_w'],1)

sns.set(style="whitegrid")
sns.scatterplot(x="cpu_delta_power_w", y="cpu_percent", hue="project_name", data=df_stats_test_tmp, style='project_name', s=150)
plt.title('Average CPU power consumption (training)')
plt.xlabel("Average CPU power (W)")
plt.ylabel("Utilisation (%)")
plt.legend(loc='upper left', title='Experiment')
plt.show()

In [None]:
df_stats_train_tmp = df_stats_train.groupby('project_name').mean().reset_index()
df_stats_train_tmp['total'] = (df_stats_train_tmp['cpu_delta_power_w'] + (df_stats_train_tmp['gpu_power_w']/1000) + 47) * (df_stats_train_tmp['duration(s)']/1000*EPOCHS / 3600)

sns.set(style="whitegrid")
sns.scatterplot(x="total", y="accuracy", hue="project_name", data=df_stats_train_tmp, style='project_name', s=150)
plt.title('Total power consumption as a function of the accuracy (training)')
plt.xlabel("Total power consumption (Wh)")
plt.ylabel("Accuracy (%)")
plt.legend(loc='lower right', title='Experiment')
add_label(df_stats_train_tmp.total, df_stats_train_tmp.accuracy, df_stats_train_tmp.project_name, plt.gca(), 1) 
plt.show()

In [None]:
df_stats_train_tmp = df_stats_train.groupby('project_name').mean().reset_index()
df_stats_train_tmp['gpu_power_w'] = round(df_stats_train_tmp['gpu_power_w']/1000)
df_stats_train_tmp['gpu_memory_used_b'] = round(df_stats_train_tmp['gpu_memory_used_b']/pow(10, 9), 2)

sns.set(style="whitegrid")
ax = sns.scatterplot(x="gpu_power_w", y="gpu_percent", hue="project_name", data=df_stats_train_tmp, style='project_name', size='gpu_memory_used_b', sizes=(50, 500))
h,l = ax.get_legend_handles_labels()
plt.legend(h[1:14],l[1:14], loc="upper left", bbox_to_anchor=(1.02, 0.75), title='Experiment')
plt.title('Average GPU power consumption - dot size represents the GPU RAM usage (training)')
plt.xlabel("Average GPU power (W)")
plt.ylabel("Utilisation (%)")

add_label(df_stats_train_tmp.gpu_power_w, df_stats_train_tmp.gpu_percent, df_stats_train_tmp.gpu_memory_used_b, plt.gca(), 2, 'GB') 
plt.show()

In [None]:
df_stats_train_tmp = df_stats_train.groupby('project_name').mean().reset_index()
df_stats_train_tmp['total'] = (df_stats_train_tmp['cpu_delta_power_w'] + (df_stats_train_tmp['gpu_power_w']/1000) + 47) * (df_stats_train_tmp['duration(s)']/1000*EPOCHS / 3600)

df_stats_eco2ai_tmp = df_eco2ai_results_train.groupby('experiment_description').sum().reset_index()
df_stats_eco2ai_tmp['total'] = df_stats_eco2ai_tmp['power_consumption(kWh)'] * 1000

df_codecarbon_tmp = df_codecarbon_results_train.groupby('project_name')['cpu_power', 'gpu_power', 'ram_power'].mean().reset_index()
df_codecarbon_tmp2 = df_codecarbon_results_train.groupby('project_name')['duration'].first().reset_index()
df_codecarbon_tmp = df_codecarbon_tmp.merge(df_codecarbon_tmp2, on='project_name', how='left')

merged_duration_df = pd.DataFrame()
merged_duration_df['CodeCarbon'] = df_codecarbon_results_train.groupby('project_name')['energy_consumed'].max()*1000
merged_duration_df.reset_index(inplace=True)
merged_duration_df['CodeCarbon_correct'] = (df_codecarbon_tmp['cpu_power'] + df_codecarbon_tmp['gpu_power'] + df_codecarbon_tmp['ram_power']) * df_codecarbon_tmp['duration']*EPOCHS / 3600
merged_duration_df['TOSH_stats'] = df_stats_train_tmp['total']
merged_duration_df['Eco2AI'] = df_stats_eco2ai_tmp['total']


sns.set_theme(style="whitegrid", font_scale=2.5)

sns.catplot(x="project_name", y="value", hue="variable", kind="bar", errorbar=lambda x: (x.min(), x.max()), data=pd.melt(merged_duration_df, id_vars=['project_name']), height=16, aspect=2/1, legend_out=False)
plt.xticks(rotation=45)
plt.title('Watts per hour (Wh) from the different tools')
plt.xlabel("Models")
plt.ylabel("Watts per hour (Wh)")
plt.legend(loc='upper right', title='Experiment')
plt.show()