# This code is to get the labeled_energy_data csv file of each model

## Import the lib

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import seaborn as sns

## load function

### interpolate function

In [3]:
def integrate_power_over_interval(samples, start_time, end_time):
    # 假定 samples是按时间升序排序的 (t, p)
    # 若未排序，请先排序:
    # samples = sorted(samples, key=lambda x: x[0])
    
    def interpolate(samples, target_time):
        # 在 samples 中找到 target_time 左右最近的两个点，并进行线性插值
        # 若 target_time 恰好等于某个样本点时间，直接返回该点功率
        # 若无法找到两侧点（如 target_time在样本时间轴外），根据情况返回None或边界点
        n = len(samples)
        if n == 0:
            return None
        # 若 target_time 小于第一个样本点时间，无法向左插值，这里直接返回第一个点的功率值(或None)
        if target_time <= samples[0][0]:
            # 简化处理：返回最早样本点的功率（或None）
            return samples[0][1]
        # 若 target_time 大于最后一个样本点时间，无法向右插值，返回最后一个点的功率（或None）
        if target_time >= samples[-1][0]:
            return samples[-1][1]

        # 否则，在中间插值
        # 使用二分查找快速定位
        import bisect
        times = [t for t, _ in samples]
        pos = bisect.bisect_left(times, target_time)
        # pos是使times保持有序插入target_time的位置
        # 因为target_time不在已有样本点中，pos不会越界且pos>0且pos<n
        t1, p1 = samples[pos-1]
        t2, p2 = samples[pos]
        # 线性插值： p = p1 + (p2 - p1)*((target_time - t1)/(t2 - t1))
        ratio = (target_time - t1) / (t2 - t1)
        p = p1 + (p2 - p1)*ratio
        return p

    # 从原始 samples 中筛选出位于[start_time, end_time]内的点
    filtered = [(t, p) for t, p in samples if start_time <= t <= end_time]

    # 如果不足2个点，则尝试使用插值
    if len(filtered) < 2:
        # 无论如何都需要在边界处插值出两个点(起码start和end)
        start_power = interpolate(samples, start_time)
        end_power = interpolate(samples, end_time)

        # 如果从样本中无法插值出任何有意义的点（比如samples为空或无法插值），返回0.0
        if start_power is None or end_power is None:
            return 0.0

        # 将插值的边界点加入到 filtered
        # 注意：如果filtered中有一个点在区间内，我们也需要确保边界有两点以上
        # 例如filtered只有一个点在中间，则需要在start和end插值点全部加入。
        # 若filtered为空，则只用start/end两点插值点求积分
        new_filtered = [(start_time, start_power)] + filtered + [(end_time, end_power)]
        # 确保按时间排序
        new_filtered.sort(key=lambda x: x[0])
        filtered = new_filtered

    # 正常积分计算
    if len(filtered) < 2:
        # 经过插值仍不够，返回0
        return 0.0

    total_energy = 0.0
    for i in range(len(filtered)-1):
        t1, p1 = filtered[i]
        t2, p2 = filtered[i+1]
        dt = t2 - t1
        avg_p = (p1 + p2)/2.0
        total_energy += avg_p * dt

    return total_energy

### label energy function

In [4]:
def label_energy_consumption(energy_data, to_device, forward, loss, backward, optimize):
    # Create a copy of the energy_data dataframe to avoid modifying the original
    labeled_energy_data = energy_data.copy()
    
    # Initialize a new column for the step labels
    labeled_energy_data['step'] = 'idle'
    
    # Define a helper function to label the steps
    def label_steps(energy_data, step_energy, step_name):
        for epoch in range(step_energy.shape[0]):
            for batch in range(step_energy.shape[1]):
                start_time = step_energy[epoch][batch][0]
                end_time = step_energy[epoch][batch][1]
                mask = (energy_data['timestamp'] >= start_time) & (energy_data['timestamp'] <= end_time)
                labeled_energy_data.loc[mask, 'step'] = step_name
    
    # Label each step
    label_steps(labeled_energy_data, to_device, 'to_device')
    label_steps(labeled_energy_data, forward, 'forward')
    label_steps(labeled_energy_data, loss, 'loss')
    label_steps(labeled_energy_data, backward, 'backward')
    label_steps(labeled_energy_data, optimize, 'optimize')
    
    return labeled_energy_data

### load the label function

## find the data path

In [5]:
current_path = os.getcwd()
data_path = os.path.join(current_path, 'ModelsData')
data_folders = os.listdir(data_path)
print(data_folders)

['googlenet_origin', 'resnet50', 'resnet34', 'vgg13', 'googlenet_mod7', 'googlenet_mod9', 'googlenet_mod8', 'googlenet_mod1', 'googlenet_mod6', 'mobilenetv1_path', 'resnet18', 'mobilenetv2_path', 'vgg16', 'vgg11', 'googlenet_mod3', 'googlenet_mod4', 'googlenet_mod5', 'googlenet_mod2', 'alexnet']


## load all the model data

In [6]:
# set the epoch number, batch size and the round number as well as the sampling rate
epoch = 5
batch_size = 128
round_num = 0
sampling_rate = 2

In [7]:
# for all the folder names in the data folder, generate the path to the folder
# and print the path
folder_name = f'E{epoch}_B{batch_size}_R{round_num}_SR{sampling_rate}'

data_dir = 'fashion_mnist'
# data_dir = 'cifar100'
# data_dir = 'cifar10'



print(folder_name)
for folder in data_folders:
    folder_path = os.path.join(data_path, folder)
    folder_path = os.path.join(folder_path, folder_name)
    folder_path = os.path.join(folder_path, data_dir)
    print(folder_path)

    # load the csv files 
    energy_data = pd.read_csv(os.path.join(folder_path, 'energy_consumption_file.csv'))

    # load the npy files
    to_device = np.load(os.path.join(folder_path, 'to_device.npy'), allow_pickle=True)
    forward = np.load(os.path.join(folder_path, 'forward.npy'), allow_pickle=True)
    loss = np.load(os.path.join(folder_path, 'loss.npy'), allow_pickle=True)
    backward = np.load(os.path.join(folder_path, 'backward.npy'), allow_pickle=True)
    optimize = np.load(os.path.join(folder_path, 'optimize.npy'), allow_pickle=True)

    # Set the display format for floating-point numbers to avoid scientific notation
    pd.options.display.float_format = '{:.6f}'.format

    # Use the function to label the energy consumption data
    labeled_energy_data = label_energy_consumption(energy_data, to_device, forward, loss, backward, optimize)
    print(labeled_energy_data.head())

    # save the file to the folder
    labeled_energy_data.to_csv(os.path.join(folder_path, 'labeled_energy_data.csv'), index=False)

E5_B128_R0_SR2
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/googlenet_origin/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/resnet50/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/resnet34/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/vgg13/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/googlenet_mod7/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/googlenet_mod9/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/googlenet_mod8/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/googlenet_mod1/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/googlenet_mod6/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/mobilenetv1_path/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Projects/GreenAI/3080/ModelsData/resnet18/E5_B128_R0_SR2/fashion_mnist
/Users/dtjgp/Proje