In [None]:
import sys
import os

module_path = os.path.abspath(os.path.join('..', 'utils'))
if module_path not in sys.path:
    sys.path.append(module_path)

import format_time as ft
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
from scipy.interpolate import griddata
from adjustText import adjust_text


EPOCHS = 200
SAMPLES = 50000

FIGURES_PATH = '../figures/'
isExist = os.path.exists(FIGURES_PATH)
if not isExist:
    os.makedirs(FIGURES_PATH)

RESULTS_PATH = '../results_all/'
BATCH_PATH = RESULTS_PATH + 'batch_experiments/'
df_stats_train = {}
df_stats_test = {}
stats_train_all = {}
stats_test_all = {}
model_size_all = {}


def process_data(models, stats, df_stats):
    for model in models:
        tmp_dict = stats[model]
        if model in model_size.keys():
            model_size_tmp = model_size[model]
            for epoch in tmp_dict.keys():
                filtered = filter(lambda num: num >= 0, tmp_dict[epoch]['cpu_delta_power_w'])
                tmp_dict[epoch]['cpu_delta_power_w'] = list(filtered)

                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'start_time'] = tmp_dict[epoch]['start_time']
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'stop_time'] = tmp_dict[epoch]['stop_time']
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'cpu_energy_uj'] = np.mean(tmp_dict[epoch]['cpu_energy_uj'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'cpu_delta_power_w'] = np.mean(tmp_dict[epoch]['cpu_delta_power_w'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'cpu_percent'] = np.mean(tmp_dict[epoch]['cpu_percent'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'cpu_memory_percent'] = np.mean(tmp_dict[epoch]['cpu_memory_percent'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'cpu_temperature_c'] = np.mean(tmp_dict[epoch]['cpu_temperature_c'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'gpu_power_w'] = np.mean(tmp_dict[epoch]['gpu_power_w'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'gpu_temperature_c'] = np.mean(tmp_dict[epoch]['gpu_temperature_c'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'gpu_memory_free_b'] = np.mean(tmp_dict[epoch]['gpu_memory_free_b'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'gpu_memory_used_b'] = np.mean(tmp_dict[epoch]['gpu_memory_used_b'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'gpu_percent'] = np.mean(tmp_dict[epoch]['gpu_percent'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'ram_power_w'] = np.mean(tmp_dict[epoch]['ram_power_w'])
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'size_mb'] = model_size_tmp['size_mb']
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'parameters'] = model_size_tmp['parameters']
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'buffer'] = model_size_tmp['buffer']
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'macs'] = model_size_tmp['macs']
                df_stats.loc[(df_stats['project_name'] == model) & (df_stats['epoch'] == int(epoch)), 'trainable_params'] = model_size_tmp['trainable_params']
    
    return df_stats


# Create a list to store the numbers
batch_sizes = []
for folder in os.listdir(BATCH_PATH):
    if folder.startswith("results_"):
        number = int(folder[8:])
        batch_sizes.append(number)

        df_stats_train_tmp = pd.read_csv(BATCH_PATH + folder + r'/mlresults_stats_train.csv')
        df_stats_train_tmp['duration(s)'] = df_stats_train_tmp['duration(s)'].apply(lambda x: ft.plot_time(x))
        df_stats_train_tmp['step(ms)'] = df_stats_train_tmp['step(ms)'].apply(lambda x: ft.plot_time(x))
        
        models = df_stats_train_tmp.project_name.unique()
        
        df_stats_test_tmp = pd.read_csv(BATCH_PATH + folder + r'/mlresults_stats_test.csv')
        df_stats_test_tmp['duration(s)'] = df_stats_test_tmp['duration(s)'].apply(lambda x: ft.plot_time(x))
        df_stats_test_tmp['step(ms)'] = df_stats_test_tmp['step(ms)'].apply(lambda x: ft.plot_time(x))
        
        df_stats_test[str(number)] = df_stats_test_tmp
        
        with open(BATCH_PATH + folder + '/stats_train.json') as f:
            stats_train = json.load(f)
        stats_train = stats_train['exp_0']
        stats_train_all[str(number)] = stats_train

        with open(BATCH_PATH + folder + '/stats_test.json') as f:
            stats_test = json.load(f)
        stats_test = stats_test['exp_0']
        stats_test_all[str(number)] = stats_test

        with open(BATCH_PATH + folder + '/model_size.json') as f:
            model_size = json.load(f)
        model_size_all[str(number)] = model_size
        
        models = df_stats_train_tmp.project_name.unique()
        
        df_stats_train_tmp = process_data(models, stats_train, df_stats_train_tmp)
        df_stats_train_tmp = df_stats_train_tmp.groupby('project_name').mean().reset_index()
        df_stats_train_tmp['total'] = (df_stats_train_tmp['cpu_delta_power_w'] + (df_stats_train_tmp['gpu_power_w']/1000) + df_stats_train_tmp['ram_power_w']) * (df_stats_train_tmp['duration(s)']/1000*EPOCHS / 3600)
        df_stats_train[str(number)] = df_stats_train_tmp
        
        df_stats_test_tmp = process_data(models, stats_test, df_stats_test_tmp)
        df_stats_test_tmp = df_stats_test_tmp.groupby('project_name').mean().reset_index()
        df_stats_test_tmp['total'] = (df_stats_test_tmp['cpu_delta_power_w'] + (df_stats_test_tmp['gpu_power_w']/1000) + df_stats_test_tmp['ram_power_w']) * (df_stats_test_tmp['duration(s)']/1000*EPOCHS / 3600)
        df_stats_test[str(number)] = df_stats_test_tmp


batch_sizes = sorted(batch_sizes)

In [None]:
final_df = pd.DataFrame()
for batch_size in batch_sizes:
    temp_df = df_stats_train[str(batch_size)][['project_name', 'total']].copy()
    temp_df['batch_size'] = batch_size
    final_df = pd.concat([final_df, temp_df], ignore_index=True)

sns.set(rc={'figure.figsize':(11.7,5.27)}, style="whitegrid")
ax = sns.barplot(x="batch_size", y="total", data=final_df, hue="project_name")
plt.title('Total power consumption as a function of the batch size (training)')
plt.xlabel("Batch Size")
plt.ylabel("Total power consumption (Wh)")
plt.legend(title='Model', bbox_to_anchor=(1.2, 0.9))

plt.tight_layout() 
plt.savefig(FIGURES_PATH + '/training_batch_total_power.png', bbox_inches='tight', format='png', dpi=400)
plt.show()

final_df = pd.DataFrame()
for batch_size in batch_sizes:
    temp_df = df_stats_test[str(batch_size)][['project_name', 'total']].copy()
    temp_df['batch_size'] = batch_size
    final_df = pd.concat([final_df, temp_df], ignore_index=True)

sns.set(rc={'figure.figsize':(11.7,5.27)}, style="whitegrid")
ax = sns.barplot(x="batch_size", y="total", data=final_df, hue="project_name")
plt.title('Total power consumption as a function of the batch size (testing)')
plt.xlabel("Batch Size")
plt.ylabel("Total power consumption (Wh)")
plt.legend(title='Model', bbox_to_anchor=(1.2, 0.9))

plt.tight_layout() 
plt.savefig(FIGURES_PATH + '/testing_batch_total_power.png', bbox_inches='tight', format='png', dpi=400)
plt.show()

In [None]:
models = ['VGG', 'DPN', 'RegNet', 'SimpleDLA', 'DenseNet']

final_df = pd.DataFrame()
for batch_size in batch_sizes:
    temp_df = df_stats_train[str(batch_size)][['project_name', 'total']].copy()
    temp_df = temp_df[temp_df['project_name'].isin(models)]
    temp_df['batch_size'] = batch_size
    final_df = pd.concat([final_df, temp_df], ignore_index=True)

sns.set(rc={'figure.figsize':(11.7,4.27)}, style="whitegrid",font_scale=1.3)
ax = sns.barplot(x="batch_size", y="total", data=final_df, hue="project_name")
plt.title('Total power consumption as a function of the batch size (training)')
plt.xlabel("Batch Size")
plt.ylabel("Total power consumption (Wh)")
plt.legend(title='Model', bbox_to_anchor=(1.23, 0.75))

plt.tight_layout() 
plt.savefig(FIGURES_PATH + '/training_batch_total_power.png', bbox_inches='tight', format='png', dpi=400)
plt.show()

final_df = pd.DataFrame()
for batch_size in batch_sizes:
    temp_df = df_stats_test[str(batch_size)][['project_name', 'total']].copy()
    temp_df = temp_df[temp_df['project_name'].isin(models)]
    temp_df['batch_size'] = batch_size
    final_df = pd.concat([final_df, temp_df], ignore_index=True)

sns.set(rc={'figure.figsize':(11.7,4.27)}, style="whitegrid", font_scale=1.3)
ax = sns.barplot(x="batch_size", y="total", data=final_df, hue="project_name")
plt.title('Total power consumption as a function of the batch size (testing)')
plt.xlabel("Batch Size")
plt.ylabel("Total power consumption (Wh)")
plt.legend(title='Model', bbox_to_anchor=(1.23, 0.75))

plt.tight_layout() 
plt.savefig(FIGURES_PATH + '/testing_batch_total_power.png', bbox_inches='tight', format='png', dpi=400)
plt.show()

In [None]:
models = ['VGG', 'DPN', 'RegNet', 'SimpleDLA', 'DenseNet']

final_df = pd.DataFrame()
for batch_size in batch_sizes:
    temp_df = df_stats_train[str(batch_size)][['project_name', 'gpu_percent']].copy()
    temp_df = temp_df[temp_df['project_name'].isin(models)]
    temp_df['batch_size'] = batch_size
    final_df = pd.concat([final_df, temp_df], ignore_index=True)

sns.set(rc={'figure.figsize':(11.7,4.27)}, style="whitegrid",font_scale=1.3)
ax = sns.barplot(x="batch_size", y="gpu_percent", data=final_df, hue="project_name")
plt.title('GPU Utilisation as a function of the batch size (training)')
plt.xlabel("Batch Size")
plt.ylabel("GPU Utilisation (%)")
plt.legend(title='Model', bbox_to_anchor=(1.23, 0.75))

plt.tight_layout() 
plt.savefig(FIGURES_PATH + '/training_batch_total_power.png', bbox_inches='tight', format='png', dpi=400)
plt.show()

final_df = pd.DataFrame()
for batch_size in batch_sizes:
    temp_df = df_stats_test[str(batch_size)][['project_name', 'gpu_percent']].copy()
    temp_df = temp_df[temp_df['project_name'].isin(models)]
    temp_df['batch_size'] = batch_size
    final_df = pd.concat([final_df, temp_df], ignore_index=True)

sns.set(rc={'figure.figsize':(11.7,4.27)}, style="whitegrid", font_scale=1.3)
ax = sns.barplot(x="batch_size", y="gpu_percent", data=final_df, hue="project_name")
plt.title('GPU Utilisation as a function of the batch size (testing)')
plt.xlabel("Batch Size")
plt.ylabel("GPU Utilisation (%)")
plt.legend(title='Model', bbox_to_anchor=(1.23, 0.75))

plt.tight_layout() 
plt.savefig(FIGURES_PATH + '/testing_batch_total_power.png', bbox_inches='tight', format='png', dpi=400)
plt.show()