# This code is to get the labeled_energy_data_layer csv file of each model

## Import the lib

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns
import numba as nb

## load function

### interpolate function

In [13]:
@nb.jit(nopython=True)
def interpolate_point(times, powers, target_time):
    n = len(times)
    if n == 0:
        return 0.0
    if target_time <= times[0]:
        return powers[0]
    if target_time >= times[-1]:
        return powers[-1]
    
    # Binary search
    left, right = 0, n-1
    while left <= right:
        mid = (left + right) // 2
        if times[mid] == target_time:
            return powers[mid]
        elif times[mid] < target_time:
            left = mid + 1
        else:
            right = mid - 1
            
    # Linear interpolation
    pos = left
    t1, p1 = times[pos-1], powers[pos-1]
    t2, p2 = times[pos], powers[pos]
    ratio = (target_time - t1) / (t2 - t1)
    return p1 + (p2 - p1) * ratio

@nb.jit(nopython=True)
def integrate_power_over_interval(samples, start_time, end_time):
    times = samples[:, 0]
    powers = samples[:, 1]
    
    # Get start and end powers through interpolation
    start_power = interpolate_point(times, powers, start_time)
    end_power = interpolate_point(times, powers, end_time)
    
    # Filter points within interval
    mask = (times >= start_time) & (times <= end_time)
    interval_times = times[mask]
    interval_powers = powers[mask]
    
    # Create array including boundary points
    n_points = len(interval_times)
    full_times = np.zeros(n_points + 2)
    full_powers = np.zeros(n_points + 2)
    
    # Add boundary points
    full_times[0] = start_time
    full_powers[0] = start_power
    full_times[-1] = end_time
    full_powers[-1] = end_power
    
    # Add interior points
    if n_points > 0:
        full_times[1:-1] = interval_times
        full_powers[1:-1] = interval_powers
    
    # Integration using trapezoidal rule
    total_energy = 0.0
    for i in range(len(full_times)-1):
        dt = full_times[i+1] - full_times[i]
        avg_p = (full_powers[i] + full_powers[i+1]) / 2.0
        total_energy += avg_p * dt
        
    return total_energy


### label energy function

In [14]:
def label_energy_consumption(energy_data, to_device, forward, loss, backward, optimize):
    # Create a copy of the energy_data dataframe to avoid modifying the original
    labeled_energy_data = energy_data.copy()
    
    # Initialize a new column for the step labels
    labeled_energy_data['step'] = 'idle'
    
    # Define a helper function to label the steps
    def label_steps(energy_data, step_energy, step_name):
        for epoch in range(step_energy.shape[0]):
            for batch in range(step_energy.shape[1]):
                start_time = step_energy[epoch][batch][0]
                end_time = step_energy[epoch][batch][1]
                mask = (energy_data['timestamp'] >= start_time) & (energy_data['timestamp'] <= end_time)
                labeled_energy_data.loc[mask, 'step'] = step_name
    
    # Label each step
    label_steps(labeled_energy_data, to_device, 'to_device')
    label_steps(labeled_energy_data, forward, 'forward')
    label_steps(labeled_energy_data, loss, 'loss')
    label_steps(labeled_energy_data, backward, 'backward')
    label_steps(labeled_energy_data, optimize, 'optimize')
    
    return labeled_energy_data

In [15]:
def label_energy_consumption_layer(energy_data, to_device, loss, backward, optimize, layer_energy):
    # Create a copy of the energy_data dataframe to avoid modifying the original
    labeled_energy_data_layer = energy_data.copy()
    
    # Initialize a new column for the step labels
    labeled_energy_data_layer['step'] = 'idle'
    
    # Define a helper function to label the steps
    def label_steps(energy_data, step_energy, step_name):
        for epoch in range(step_energy.shape[0]):
            for batch in range(step_energy.shape[1]):
                start_time = step_energy[epoch][batch][0]
                end_time = step_energy[epoch][batch][1]
                # print(type(start_time))
                # print(type(energy_data['timestamp']))
                mask = (energy_data['timestamp'] >= start_time) & (energy_data['timestamp'] <= end_time)
                labeled_energy_data_layer.loc[mask, 'step'] = step_name

    # define a helper function to label the layers
    def label_layers(energy_data, layer_energy):
        for i in range(layer_energy.shape[1]):
            for j in range(layer_energy.shape[0]):
                # each row in layer_energy is a batch, and in each batch is a dictionary with the layer names and the corresponding time
                layer_batch = layer_energy.iloc[j][str(i)]
                # transfer the layer_batch from string to dictionary
                layer_batch = eval(layer_batch)
                # iterate through the dictionary to get the start and end time of each layer
                for layer, time_period in layer_batch.items():
                    start_time = time_period[0]
                    end_time = time_period[1]
                    mask = (energy_data['timestamp'] >= start_time) & (energy_data['timestamp'] <= end_time)
                    labeled_energy_data_layer.loc[mask, 'step'] = layer
    
    # Label each step
    label_steps(labeled_energy_data_layer, to_device, 'to_device')
    label_layers(labeled_energy_data_layer, layer_energy)
    label_steps(labeled_energy_data_layer, loss, 'loss')
    label_steps(labeled_energy_data_layer, backward, 'backward')
    label_steps(labeled_energy_data_layer, optimize, 'optimize')
    
    return labeled_energy_data_layer

### load the label function

## find the data path

In [16]:
current_path = os.getcwd()
data_path = os.path.join(current_path, 'ModelsData/resnet18')
data_folders = os.listdir(data_path)
print(data_folders)

['E5_B128_R0_SR2_performance_140', 'E5_B128_R0_SR2_performance_310', 'E5_B128_R0_SR2_performance_170', 'E5_B128_R0_SR2_performance_320', 'E5_B128_R0_SR2_performance_290', 'E5_B128_R0_SR2_performance_230', 'E5_B128_R0_SR2_performance_200', 'fashion_mnist', 'E5_B128_R0_SR2_performance_240', 'E5_B128_R0_SR2_performance_270', 'E5_B128_R0_SR2_performance_130', 'E5_B128_R0_SR2_performance_190', 'E5_B128_R0_SR2_performance_100', 'E5_B128_R0_SR2_performance_110', 'E5_B128_R0_SR2_performance_120', 'E5_B128_R0_SR2_performance_180', 'E5_B128_R0_SR2_performance_260', 'E5_B128_R0_SR2_layer', 'E5_B128_R0_SR2_performance_250', 'E5_B128_R0_SR2_performance_210', 'E5_B128_R0_SR2_performance_280', 'E5_B128_R0_SR2_performance_220', 'E5_B128_R0_SR2_performance_160', 'E5_B128_R0_SR2_performance_150', 'E5_B128_R0_SR2_performance_300']


In [17]:
# find out all the folder names with performance
performance_folders = []
for folder in data_folders:
    if 'performance' in folder:
        performance_folders.append(folder)
print(performance_folders)

# reorder the performance folders based on the last number in the
#  folder name
performance_folders.sort(key=lambda x: int(x.split('_')[-1]))
print(performance_folders)

data_folders = performance_folders

['E5_B128_R0_SR2_performance_140', 'E5_B128_R0_SR2_performance_310', 'E5_B128_R0_SR2_performance_170', 'E5_B128_R0_SR2_performance_320', 'E5_B128_R0_SR2_performance_290', 'E5_B128_R0_SR2_performance_230', 'E5_B128_R0_SR2_performance_200', 'E5_B128_R0_SR2_performance_240', 'E5_B128_R0_SR2_performance_270', 'E5_B128_R0_SR2_performance_130', 'E5_B128_R0_SR2_performance_190', 'E5_B128_R0_SR2_performance_100', 'E5_B128_R0_SR2_performance_110', 'E5_B128_R0_SR2_performance_120', 'E5_B128_R0_SR2_performance_180', 'E5_B128_R0_SR2_performance_260', 'E5_B128_R0_SR2_performance_250', 'E5_B128_R0_SR2_performance_210', 'E5_B128_R0_SR2_performance_280', 'E5_B128_R0_SR2_performance_220', 'E5_B128_R0_SR2_performance_160', 'E5_B128_R0_SR2_performance_150', 'E5_B128_R0_SR2_performance_300']
['E5_B128_R0_SR2_performance_100', 'E5_B128_R0_SR2_performance_110', 'E5_B128_R0_SR2_performance_120', 'E5_B128_R0_SR2_performance_130', 'E5_B128_R0_SR2_performance_140', 'E5_B128_R0_SR2_performance_150', 'E5_B128_R0_

## load all the model data

In [18]:
# set the epoch number, batch size and the round number as well as the sampling rate
epoch = 5
batch_size = 128
round_num = 0
sampling_rate = 2

In [19]:
# # for all the folder names in the data folder, generate the path to the folder
# # and print the path
# folder_name = f'E{epoch}_B{batch_size}_R{round_num}_SR{sampling_rate}_layer'

# data_dir = 'fashion_mnist'
# # data_dir = 'cifar100'
# # data_dir = 'cifar10'



# print(folder_name)
# for folder in data_folders:
#     folder_path = os.path.join(data_path, folder)
#     folder_path = os.path.join(folder_path, folder_name)
#     folder_path = os.path.join(folder_path, data_dir)
#     print(folder_path)

#     # load the csv files 
#     energy_data = pd.read_csv(os.path.join(folder_path, 'energy_consumption_file.csv'))

#     # load the npy files
#     to_device = np.load(os.path.join(folder_path, 'to_device.npy'), allow_pickle=True)
#     forward = np.load(os.path.join(folder_path, 'forward.npy'), allow_pickle=True)
#     loss = np.load(os.path.join(folder_path, 'loss.npy'), allow_pickle=True)
#     backward = np.load(os.path.join(folder_path, 'backward.npy'), allow_pickle=True)
#     optimize = np.load(os.path.join(folder_path, 'optimize.npy'), allow_pickle=True)

#     # Set the display format for floating-point numbers to avoid scientific notation
#     pd.options.display.float_format = '{:.6f}'.format

#     # Use the function to label the energy consumption data
#     labeled_energy_data = label_energy_consumption(energy_data, to_device, forward, loss, backward, optimize)
#     print(labeled_energy_data.head())

#     # save the file to the folder
#     labeled_energy_data.to_csv(os.path.join(folder_path, 'labeled_energy_data.csv'), index=False)

In [20]:
# # for all the folder names in the data folder, generate the path to the folder
# # Find the index of 'alexnet' in data_folders
# # alexnet_index = data_folders.index('alexnet')
# resnet18_index = data_folders.index('resnet18')

# # print(f"Index of 'alexnet': {alexnet_index}")
# print(f"Index of 'resnet18': {resnet18_index}")

# # and print the path
# folder_name = f'E{epoch}_B{batch_size}_R{round_num}_SR{sampling_rate}_layer'

# data_dir = 'fashion_mnist'
# # data_dir = 'cifar100'
# # data_dir = 'cifar10'
# print(folder_name)

# folder = data_folders[resnet18_index]

# folder_path = os.path.join(data_path, folder)
# folder_path = os.path.join(folder_path, folder_name)
# folder_path = os.path.join(folder_path, data_dir)
# # print(folder_path)

# print(folder_path)

# # load the csv files 
# energy_data = pd.read_csv(os.path.join(folder_path, 'energy_consumption_file.csv'))
# # print(type(energy_data.iloc[0]['timestamp']))
# energy_data['timestamp'] = pd.to_numeric(energy_data['timestamp'], errors='coerce')



# forward_layer_data = pd.read_csv(os.path.join(folder_path, 'layer_time.csv'))

# # load the npy files
# to_device = np.load(os.path.join(folder_path, 'to_device.npy'), allow_pickle=True)
# forward = np.load(os.path.join(folder_path, 'forward.npy'), allow_pickle=True)
# loss = np.load(os.path.join(folder_path, 'loss.npy'), allow_pickle=True)
# backward = np.load(os.path.join(folder_path, 'backward.npy'), allow_pickle=True)
# optimize = np.load(os.path.join(folder_path, 'optimize.npy'), allow_pickle=True)



# # Set the display format for floating-point numbers to avoid scientific notation
# pd.options.display.float_format = '{:.6f}'.format


# # Use the function to label the energy consumption data
# labeled_energy_data = label_energy_consumption(energy_data, to_device, forward, loss, backward, optimize)


# # Use the function to label the energy consumption data with the layer names
# # labeled_energy_data_layer = label_energy_consumption_layer(energy_data, to_device, loss, backward, optimize, forward_layer_data)
# # print(labeled_energy_data_layer.head())

# # save the file to the folder
# # labeled_energy_data_layer.to_csv(os.path.join(folder_path, 'labeled_energy_data_layer.csv'), index=False)
# labeled_energy_data.to_csv(os.path.join(folder_path, 'labeled_energy_data.csv'), index=False)

In [25]:
# for all the folder names in the data folder, generate the path to the folder
# Find the index of 'alexnet' in data_folders
# alexnet_index = data_folders.index('alexnet')
# resnet18_index = data_folders.index('resnet18')

# # print(f"Index of 'alexnet': {alexnet_index}")
# print(f"Index of 'resnet18': {resnet18_index}")

# # and print the path
# folder_name = f'E{epoch}_B{batch_size}_R{round_num}_SR{sampling_rate}_layer'

data_dir = 'fashion_mnist'
# # data_dir = 'cifar100'
# # data_dir = 'cifar10'
# print(folder_name)

# folder = data_folders[resnet18_index]

# folder_path = os.path.join(data_path, folder)
# folder_path = os.path.join(folder_path, folder_name)
# folder_path = os.path.join(folder_path, data_dir)
# # print(folder_path)

# print(folder_path)

for folder in data_folders:
    
    folder_path = os.path.join(data_path, folder)
    folder_path = os.path.join(folder_path, data_dir)
    print(folder_path)
    # load the csv files 
    energy_data = pd.read_csv(os.path.join(folder_path, 'energy_consumption_file.csv'))
    # print(type(energy_data.iloc[0]['timestamp']))
    energy_data['timestamp'] = pd.to_numeric(energy_data['timestamp'], errors='coerce')



    forward_layer_data = pd.read_csv(os.path.join(folder_path, 'layer_time.csv'))

    # load the npy files
    to_device = np.load(os.path.join(folder_path, 'to_device.npy'), allow_pickle=True)
    forward = np.load(os.path.join(folder_path, 'forward.npy'), allow_pickle=True)
    loss = np.load(os.path.join(folder_path, 'loss.npy'), allow_pickle=True)
    backward = np.load(os.path.join(folder_path, 'backward.npy'), allow_pickle=True)
    optimize = np.load(os.path.join(folder_path, 'optimize.npy'), allow_pickle=True)



    # Set the display format for floating-point numbers to avoid scientific notation
    pd.options.display.float_format = '{:.6f}'.format


    # Use the function to label the energy consumption data
    labeled_energy_data = label_energy_consumption(energy_data, to_device, forward, loss, backward, optimize)


    # Use the function to label the energy consumption data with the layer names
    # labeled_energy_data_layer = label_energy_consumption_layer(energy_data, to_device, loss, backward, optimize, forward_layer_data)
    # print(labeled_energy_data_layer.head())

    # save the file to the folder
    # labeled_energy_data_layer.to_csv(os.path.join(folder_path, 'labeled_energy_data_layer.csv'), index=False)
    labeled_energy_data.to_csv(os.path.join(folder_path, 'labeled_energy_data.csv'), index=False)

/Users/dtjgp/Projects/GreenAI/4070/ModelsData/resnet18/E5_B128_R0_SR2_performance_100


FileNotFoundError: [Errno 2] No such file or directory: '/Users/dtjgp/Projects/GreenAI/4070/ModelsData/resnet18/E5_B128_R0_SR2_performance_100/energy_consumption_file.csv'