# This code is to get the labeled_energy_data csv file of each model

## 对数据进行分析,展示出来的内容包括:
### 1. 每个模型在 5 个 epochs 下的每个能耗数据点

## Import the lib

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
# import seaborn as sns
import numba as nb


## load the interpolate function

In [2]:
@nb.jit(nopython=True)
def interpolate_point(times, powers, target_time):
    n = len(times)
    if n == 0:
        return 0.0
    if target_time <= times[0]:
        return powers[0]
    if target_time >= times[-1]:
        return powers[-1]
    
    # Binary search
    left, right = 0, n-1
    while left <= right:
        mid = (left + right) // 2
        if times[mid] == target_time:
            return powers[mid]
        elif times[mid] < target_time:
            left = mid + 1
        else:
            right = mid - 1
            
    # Linear interpolation
    pos = left
    t1, p1 = times[pos-1], powers[pos-1]
    t2, p2 = times[pos], powers[pos]
    ratio = (target_time - t1) / (t2 - t1)
    return p1 + (p2 - p1) * ratio

@nb.jit(nopython=True)
def integrate_power_over_interval(samples, start_time, end_time):
    times = samples[:, 0]
    powers = samples[:, 1]
    
    # Get start and end powers through interpolation
    start_power = interpolate_point(times, powers, start_time)
    end_power = interpolate_point(times, powers, end_time)
    
    # Filter points within interval
    mask = (times >= start_time) & (times <= end_time)
    interval_times = times[mask]
    interval_powers = powers[mask]
    
    # Create array including boundary points
    n_points = len(interval_times)
    full_times = np.zeros(n_points + 2)
    full_powers = np.zeros(n_points + 2)
    
    # Add boundary points
    full_times[0] = start_time
    full_powers[0] = start_power
    full_times[-1] = end_time
    full_powers[-1] = end_power
    
    # Add interior points
    if n_points > 0:
        full_times[1:-1] = interval_times
        full_powers[1:-1] = interval_powers
    
    # Integration using trapezoidal rule
    total_energy = 0.0
    for i in range(len(full_times)-1):
        dt = full_times[i+1] - full_times[i]
        avg_p = (full_powers[i] + full_powers[i+1]) / 2.0
        total_energy += avg_p * dt
        
    return total_energy


## find the data path

In [3]:
current_path = os.getcwd()
data_path = os.path.join(current_path, 'ModelsData')
data_folders = os.listdir(data_path)
print(data_folders)

['googlenet_origin', 'resnet50', 'resnet34', 'mobilenetv2', 'vgg13', 'googlenet_mod7', 'googlenet_mod9', 'googlenet_mod8', 'googlenet_mod1', 'googlenet_mod6', 'resnet18', 'mobilenetv1', 'vgg16', 'vgg11', 'googlenet_mod3', 'googlenet_mod4', 'googlenet_mod5', 'googlenet_mod2', 'alexnet']


## load all the model data

In [4]:
# set the epoch number, batch size and the round number as well as the sampling rate
epoch = 5
batch_size = 128
round_num = 0
sampling_rate = 2

In [5]:
# # for all the folder names in the data folder, generate the path to the folder
# # and print the path
# folder_name = f'E{epoch}_B{batch_size}_R{round_num}_SR{sampling_rate}'
# print(folder_name)

# data_dir = 'fashion_mnist'
# # data_dir = 'cifar100'
# # data_dir = 'cifar10'

# for folder in data_folders:
#     folder_path = os.path.join(data_path, folder)
#     folder_path = os.path.join(folder_path, folder_name)
#     folder_path = os.path.join(folder_path, data_dir)
#     print(folder_path)
#     # show the files in the folder 
#     for file in os.listdir(folder_path):
#         print(file)

#     # read the data from the csv files and npy files
#     # load the csv files 
#     energy_data = pd.read_csv(os.path.join(folder_path, 'energy_consumption_file.csv'))
#     labeled_energy_data = pd.read_csv(os.path.join(folder_path, 'labeled_energy_data.csv'))

#     # load the npy files
#     to_device = np.load(os.path.join(folder_path, 'to_device.npy'), allow_pickle=True)
#     forward = np.load(os.path.join(folder_path, 'forward.npy'), allow_pickle=True)
#     loss = np.load(os.path.join(folder_path, 'loss.npy'), allow_pickle=True)
#     backward = np.load(os.path.join(folder_path, 'backward.npy'), allow_pickle=True)
#     optimize = np.load(os.path.join(folder_path, 'optimize.npy'), allow_pickle=True)

#     # create energy consumption data to save each step energy consumption
#     # use the ndarray to save the data, the shape is the same as the original data with the shape[2] changed to 3
#     # the first two value is the start time and end time, the third value is the energy consumption
#     to_device_energy = np.zeros((to_device.shape[0], to_device.shape[1], 3))
#     forward_energy = np.zeros((forward.shape[0], forward.shape[1], 3))
#     loss_energy = np.zeros((loss.shape[0], loss.shape[1], 3))
#     backward_energy = np.zeros((backward.shape[0], backward.shape[1], 3))
#     optimize_energy = np.zeros((optimize.shape[0], optimize.shape[1], 3))

#     # calculate the energy consumption for each step, 
#     # each runs 2 epochs, the first value of the shape is the epoch number
#     # the second value is number of batches
#     for epoch in range(to_device.shape[0]):
#         for batch in range(to_device.shape[1]):
#             to_device_energy[epoch][batch][0] = to_device[epoch][batch][0]
#             to_device_energy[epoch][batch][1] = to_device[epoch][batch][1]
#             to_device_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, to_device[epoch][batch][0], to_device[epoch][batch][1])

#             forward_energy[epoch][batch][0] = forward[epoch][batch][0]
#             forward_energy[epoch][batch][1] = forward[epoch][batch][1]
#             forward_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, forward[epoch][batch][0], forward[epoch][batch][1])

#             loss_energy[epoch][batch][0] = loss[epoch][batch][0]
#             loss_energy[epoch][batch][1] = loss[epoch][batch][1]
#             loss_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, loss[epoch][batch][0], loss[epoch][batch][1])

#             backward_energy[epoch][batch][0] = backward[epoch][batch][0]
#             backward_energy[epoch][batch][1] = backward[epoch][batch][1]
#             backward_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, backward[epoch][batch][0], backward[epoch][batch][1])

#             optimize_energy[epoch][batch][0] = optimize[epoch][batch][0]
#             optimize_energy[epoch][batch][1] = optimize[epoch][batch][1]
#             optimize_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, optimize[epoch][batch][0], optimize[epoch][batch][1])
    


#     # save the energy consumption data to the folder
#     np.save(os.path.join(folder_path, 'to_device_energy.npy'), to_device_energy, allow_pickle=True)
#     np.save(os.path.join(folder_path, 'forward_energy.npy'), forward_energy, allow_pickle=True)
#     np.save(os.path.join(folder_path, 'loss_energy.npy'), loss_energy, allow_pickle=True)
#     np.save(os.path.join(folder_path, 'backward_energy.npy'), backward_energy, allow_pickle=True)
#     np.save(os.path.join(folder_path, 'optimize_energy.npy'), optimize_energy, allow_pickle=True)

In [6]:
# for all the folder names in the data folder, generate the path to the folder
# and print the path
folder_name = f'E{epoch}_B{batch_size}_R{round_num}_SR{sampling_rate}_layer'
print(folder_name)
alexnet_index = data_folders.index('alexnet')
print(f"Index of 'alexnet': {alexnet_index}")

folder = data_folders[alexnet_index]

data_dir = 'fashion_mnist'
# data_dir = 'cifar100'
# data_dir = 'cifar10'

folder_path = os.path.join(data_path, folder)
folder_path = os.path.join(folder_path, folder_name)
folder_path = os.path.join(folder_path, data_dir)
print(folder_path)


# read the data from the csv files and npy files
# load the csv files 
energy_data = pd.read_csv(os.path.join(folder_path, 'energy_consumption_file.csv'))
labeled_energy_data = pd.read_csv(os.path.join(folder_path, 'labeled_energy_data.csv'))
layer_time = pd.read_csv(os.path.join(folder_path, 'layer_time.csv'))

# load the npy files
to_device = np.load(os.path.join(folder_path, 'to_device.npy'), allow_pickle=True)
# forward = np.load(os.path.join(folder_path, 'forward.npy'), allow_pickle=True)
loss = np.load(os.path.join(folder_path, 'loss.npy'), allow_pickle=True)
backward = np.load(os.path.join(folder_path, 'backward.npy'), allow_pickle=True)
optimize = np.load(os.path.join(folder_path, 'optimize.npy'), allow_pickle=True)

# create energy consumption data to save each step energy consumption
# use the ndarray to save the data, the shape is the same as the original data with the shape[2] changed to 3
# the first two value is the start time and end time, the third value is the energy consumption
to_device_energy = np.zeros((to_device.shape[0], to_device.shape[1], 3))
# forward_energy = np.zeros((forward.shape[0], forward.shape[1], 3))
loss_energy = np.zeros((loss.shape[0], loss.shape[1], 3))
backward_energy = np.zeros((backward.shape[0], backward.shape[1], 3))
optimize_energy = np.zeros((optimize.shape[0], optimize.shape[1], 3))

# calculate the energy consumption for each step, 
# each runs 2 epochs, the first value of the shape is the epoch number
# the second value is number of batches
for epoch in range(to_device.shape[0]):
    for batch in range(to_device.shape[1]):
        to_device_energy[epoch][batch][0] = to_device[epoch][batch][0]
        to_device_energy[epoch][batch][1] = to_device[epoch][batch][1]
        to_device_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, to_device[epoch][batch][0], to_device[epoch][batch][1])

        # forward_energy[epoch][batch][0] = forward[epoch][batch][0]
        # forward_energy[epoch][batch][1] = forward[epoch][batch][1]
        # forward_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, forward[epoch][batch][0], forward[epoch][batch][1])

        loss_energy[epoch][batch][0] = loss[epoch][batch][0]
        loss_energy[epoch][batch][1] = loss[epoch][batch][1]
        loss_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, loss[epoch][batch][0], loss[epoch][batch][1])

        backward_energy[epoch][batch][0] = backward[epoch][batch][0]
        backward_energy[epoch][batch][1] = backward[epoch][batch][1]
        backward_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, backward[epoch][batch][0], backward[epoch][batch][1])

        optimize_energy[epoch][batch][0] = optimize[epoch][batch][0]
        optimize_energy[epoch][batch][1] = optimize[epoch][batch][1]
        optimize_energy[epoch][batch][2] = integrate_power_over_interval(energy_data.values, optimize[epoch][batch][0], optimize[epoch][batch][1])

layer_energy = layer_time.copy()   
# change the tuple to list and store back to the DataFrame
for i in range(layer_energy.shape[1]):
    for j in range(layer_energy.shape[0]):
        layer_batch = layer_energy.iloc[j][str(i)]
        layer_batch = eval(layer_batch)
        for layer, time_period in layer_batch.items():
            time_period = list(time_period)
            # replace the tuple with list in the dict
            layer_batch[layer] = time_period
        # store back to DataFrame
        layer_energy.iloc[j][str(i)] = layer_batch


for i in range(layer_energy.shape[1]):
    for j in range(layer_energy.shape[0]):
        layer_batch = layer_energy.iloc[j][str(i)]
        for layer, time_period in layer_batch.items():
            # change the time_period type from tuple to list
            time_period = list(time_period)
            start_time = time_period[0]
            end_time = time_period[1]
            energy = integrate_power_over_interval(energy_data.values, start_time, end_time)
            # add the energy consumption after the start time and before the end time
            time_period.append(energy)  
            # store back to the DataFrame
            layer_batch[layer] = time_period
        # store back to the DataFrame
        layer_energy.iloc[j][str(i)] = layer_batch


layer_energy.to_csv(os.path.join(folder_path, 'layer_energy.csv'), index=False)

# save the energy consumption data to the folder
np.save(os.path.join(folder_path, 'to_device_energy.npy'), to_device_energy, allow_pickle=True)
# np.save(os.path.join(folder_path, 'forward_energy.npy'), forward_energy, allow_pickle=True)
np.save(os.path.join(folder_path, 'loss_energy.npy'), loss_energy, allow_pickle=True)
np.save(os.path.join(folder_path, 'backward_energy.npy'), backward_energy, allow_pickle=True)
np.save(os.path.join(folder_path, 'optimize_energy.npy'), optimize_energy, allow_pickle=True)

E5_B128_R0_SR2_layer
Index of 'alexnet': 18
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/alexnet/E5_B128_R0_SR2_layer/fashion_mnist
