# This code is to get the labeled_energy_data csv file of each model

## Import the lib

In [1]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
from Functions.LabelEnergyAnalysis import *

## find the data path

In [2]:
current_path = os.getcwd()
data_path = os.path.join(current_path, 'CNNModelsData')
data_folders = os.listdir(data_path)
print(data_folders)

# create a dictionary to store the data for the database
if not os.path.exists('Database'):
    os.makedirs('Database')

database_dir = os.path.join(current_path, 'Database')
print(f'The database path is {database_dir}')

['googlenet_origin', 'resnet50', 'resnet34', 'mobilenetv2', 'vgg13', 'googlenet_mod7', 'googlenet_mod9', 'googlenet_mod8', 'googlenet_mod1', 'googlenet_mod6', 'resnet18', 'mobilenetv1', 'vgg16', 'vgg11', 'googlenet_mod3', 'googlenet_mod4', 'googlenet_mod5', 'googlenet_mod2', 'alexnet']
The database path is /Users/dtjgp/Projects/GreenAI/4090/Database


## load all the model data

In [3]:
# set the epoch number, batch size and the round number as well as the sampling rate
epoch = 5
batch_size = 128
round_num = 0
sampling_rate = 2

In [4]:
# for all the folder names in the data folder, generate the path to the folder
# and print the path
folder_name = f'E{epoch}_B{batch_size}_R{round_num}_SR{sampling_rate}'
print(f'The folder name is {folder_name}')

# data_dir_list = ['fashion_mnist', 'cifar100']
data_dir_list = ['cifar100']


for dataset in data_dir_list:
    for folder in data_folders:
        folder_path = os.path.join(data_path, folder)
        folder_path = os.path.join(folder_path, dataset)
        folder_path = os.path.join(folder_path, folder_name)
        print(folder_path)

        # create a directory to store the database of each model in the database folder
        database_path = os.path.join(database_dir, folder)
        database_path = os.path.join(database_path, dataset)
        database_path = os.path.join(database_path, folder_name)

        # create the directory if it does not exist
        if not os.path.exists(database_path):
            os.makedirs(database_path)

        # load the csv files 
        energy_data = pd.read_csv(os.path.join(folder_path, 'energy_consumption_file.csv'))
        energy_data['timestamp'] = pd.to_numeric(energy_data['timestamp'], errors='coerce')

        # load the npy files
        to_device = np.load(os.path.join(folder_path, 'to_device.npy'), allow_pickle=True)
        forward = np.load(os.path.join(folder_path, 'forward.npy'), allow_pickle=True)
        loss = np.load(os.path.join(folder_path, 'loss.npy'), allow_pickle=True)
        backward = np.load(os.path.join(folder_path, 'backward.npy'), allow_pickle=True)
        optimize = np.load(os.path.join(folder_path, 'optimize.npy'), allow_pickle=True)

        # create energy consumption data to save each step energy consumption
        # use the ndarray to save the data, the shape is the same as the original data with the shape[2] changed to 3
        # the first two value is the start time and end time, the third value is the energy consumption
        to_device_energy = np.zeros((to_device.shape[0], to_device.shape[1], 3))
        forward_energy = np.zeros((forward.shape[0], forward.shape[1], 3))
        loss_energy = np.zeros((loss.shape[0], loss.shape[1], 3))
        backward_energy = np.zeros((backward.shape[0], backward.shape[1], 3))
        optimize_energy = np.zeros((optimize.shape[0], optimize.shape[1], 3))

        to_device_df = transfer_type(to_device)
        forward_df = transfer_type(forward)
        loss_df = transfer_type(loss)
        backward_df = transfer_type(backward)
        optimize_df = transfer_type(optimize)

        # calculate the energy consumption for each step, 
        # each runs 2 epochs, the first value of the shape is the epoch number
        # the second value is number of batches
        for epoch in range(to_device.shape[0]): # for each epoch
            for batch in range(to_device.shape[1]): # for each batch
                to_device_energy[epoch][batch][0] = to_device[epoch][batch][0]
                to_device_energy[epoch][batch][1] = to_device[epoch][batch][1]
                # # check if there is any 0 value in the data
                # if to_device[epoch][batch][0] == 0 or to_device[epoch][batch][1] == 0:
                #     print(f'The value is 0 in the to_device data, the epoch is {epoch} and the batch is {batch}')
                to_device_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, to_device[epoch][batch][0], to_device[epoch][batch][1])
                # add the value to the dataframe
                to_device_df.iloc[batch][epoch].append(to_device_energy[epoch][batch][2])
                # to_device_df.loc[(to_device_df['epoch'] == epoch) & (to_device_df['batch'] == batch), 'energy'] = to_device_energy[epoch][batch][2]

                forward_energy[epoch][batch][0] = forward[epoch][batch][0]
                forward_energy[epoch][batch][1] = forward[epoch][batch][1]
                forward_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, forward[epoch][batch][0], forward[epoch][batch][1])
                # add the value to the dataframe
                forward_df.iloc[batch][epoch].append(forward_energy[epoch][batch][2])
                # forward_df.loc[(forward_df['epoch'] == epoch) & (forward_df['batch'] == batch), 'energy'] = forward_energy[epoch][batch][2]

                loss_energy[epoch][batch][0] = loss[epoch][batch][0]
                loss_energy[epoch][batch][1] = loss[epoch][batch][1]
                loss_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, loss[epoch][batch][0], loss[epoch][batch][1])
                # add the value to the dataframe
                loss_df.iloc[batch][epoch].append(loss_energy[epoch][batch][2])
                # loss_df.loc[(loss_df['epoch'] == epoch) & (loss_df['batch'] == batch), 'energy'] = loss_energy[epoch][batch][2]

                backward_energy[epoch][batch][0] = backward[epoch][batch][0]
                backward_energy[epoch][batch][1] = backward[epoch][batch][1]
                backward_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, backward[epoch][batch][0], backward[epoch][batch][1])
                # add the value to the dataframe
                backward_df.iloc[batch][epoch].append(backward_energy[epoch][batch][2])
                # backward_df.loc[(backward_df['epoch'] == epoch) & (backward_df['batch'] == batch), 'energy'] = backward_energy[epoch][batch][2]

                optimize_energy[epoch][batch][0] = optimize[epoch][batch][0]
                optimize_energy[epoch][batch][1] = optimize[epoch][batch][1]
                optimize_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, optimize[epoch][batch][0], optimize[epoch][batch][1])
                # add the value to the dataframe
                optimize_df.iloc[batch][epoch].append(optimize_energy[epoch][batch][2])
                # optimize_df.loc[(optimize_df['epoch'] == epoch) & (optimize_df['batch'] == batch), 'energy'] = optimize_energy[epoch][batch][2]

        # save the energy consumption data to the folder
        np.save(os.path.join(folder_path, 'to_device_energy.npy'), to_device_energy, allow_pickle=True)
        np.save(os.path.join(folder_path, 'forward_energy.npy'), forward_energy, allow_pickle=True)
        np.save(os.path.join(folder_path, 'loss_energy.npy'), loss_energy, allow_pickle=True)
        np.save(os.path.join(folder_path, 'backward_energy.npy'), backward_energy, allow_pickle=True)
        np.save(os.path.join(folder_path, 'optimize_energy.npy'), optimize_energy, allow_pickle=True)

        # Set the display format for floating-point numbers to avoid scientific notation
        pd.options.display.float_format = '{:.6f}'.format

        # Use the function to label the energy consumption data
        labeled_energy_data = label_energy_consumption(energy_data, to_device, forward, loss, backward, optimize)
        # print(labeled_energy_data.head())

        # save the file to the folder
        labeled_energy_data.to_csv(os.path.join(folder_path, 'labeled_energy_data.csv'), index=False)

        # save all the dataframe data to the database
        to_device_df.to_csv(os.path.join(database_path, 'to_device.csv'), index=False)
        forward_df.to_csv(os.path.join(database_path, 'forward.csv'), index=False)
        loss_df.to_csv(os.path.join(database_path, 'loss.csv'), index=False)
        backward_df.to_csv(os.path.join(database_path, 'backward.csv'), index=False)
        optimize_df.to_csv(os.path.join(database_path, 'optimize.csv'), index=False)
        labeled_energy_data.to_csv(os.path.join(database_path, 'labeled_energy_data.csv'), index=False)
        

The folder name is E5_B128_R0_SR2
/Users/dtjgp/Projects/GreenAI/4090/CNNModelsData/googlenet_origin/cifar100/E5_B128_R0_SR2
/Users/dtjgp/Projects/GreenAI/4090/CNNModelsData/resnet50/cifar100/E5_B128_R0_SR2
/Users/dtjgp/Projects/GreenAI/4090/CNNModelsData/resnet34/cifar100/E5_B128_R0_SR2
/Users/dtjgp/Projects/GreenAI/4090/CNNModelsData/mobilenetv2/cifar100/E5_B128_R0_SR2


FileNotFoundError: [Errno 2] No such file or directory: '/Users/dtjgp/Projects/GreenAI/4090/CNNModelsData/mobilenetv2/cifar100/E5_B128_R0_SR2/energy_consumption_file.csv'