# This code is to gather the information of the energy consumption of the whole training process of different models.

## import the required libraries

In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import torchvision
import torchvision.transforms as transforms
from d2l import torch as d2l
import numpy as np
import pandas as pd
from ptflops import get_model_complexity_info
from pathlib import Path
import os
import time
import pynvml
import threading
import queue

## find the path

In [2]:
'''find the Model path'''
# find the current path
from pathlib import Path

# find the current path
current_path = Path.cwd()
print('The current path is:', current_path)

# find the data path
data_path = Path(current_path / 'ModelsData')
print('The data path is:', data_path)

The current path is: /root/autodl-tmp/GreenAI/3080
The data path is: /root/autodl-tmp/GreenAI/3080/ModelsData


## Models

### generate the data paths

In [3]:
models_name = ['resnet18']

In [4]:
DataList = [Path(f"{data_path}/{i}") for i in models_name]
print(DataList)

[PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/resnet18')]


### create the models

#### ResNet Models

##### ResNet18 Model

In [5]:
class Residual18(nn.Module):  #@save
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels, kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels, kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)
        self.relu1 = nn.ReLU()
        self.relu2 = nn.ReLU()
        
        # Dictionary to record time for each sub-layer
        self.layer_time = {}

    def forward(self, X):
        start_t = time.time()
        out = self.conv1(X)
        torch.cuda.synchronize()
        end_t = time.time()
        self.layer_time['conv1'] = end_t - start_t

        start_t = time.time()
        out = self.bn1(out)
        torch.cuda.synchronize()
        end_t = time.time()
        self.layer_time['bn1'] = end_t - start_t

        start_t = time.time()
        out = self.relu1(out)
        torch.cuda.synchronize()
        end_t = time.time()
        self.layer_time['relu1'] = end_t - start_t

        start_t = time.time()
        out = self.conv2(out)
        torch.cuda.synchronize()
        end_t = time.time()
        self.layer_time['conv2'] = end_t - start_t

        start_t = time.time()
        out = self.bn2(out)
        torch.cuda.synchronize()
        end_t = time.time()
        self.layer_time['bn2'] = end_t - start_t

        if self.conv3:
            start_t = time.time()
            X = self.conv3(X)
            torch.cuda.synchronize()
            end_t = time.time()
            self.layer_time['conv3'] = end_t - start_t

        start_t = time.time()
        out = out + X
        out = self.relu2(out)
        torch.cuda.synchronize()
        end_t = time.time()
        self.layer_time['residual_add_relu2'] = end_t - start_t

        return out
    
    
def resnet18(img_channel, num_labels):
    b1 = nn.Sequential(
        nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
        nn.BatchNorm2d(64),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )

    def resnet_block(input_channels, num_channels, num_residuals, first_block=False):
        blk = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.append(Residual18(input_channels, num_channels, use_1x1conv=True, strides=2))
            else:
                blk.append(Residual18(num_channels, num_channels))
        return blk

    b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
    b3 = nn.Sequential(*resnet_block(64, 128, 2))
    b4 = nn.Sequential(*resnet_block(128, 256, 2))
    b5 = nn.Sequential(*resnet_block(256, 512, 2))

    net = nn.Sequential(
                        b1, b2, b3, b4, b5,
                        nn.AdaptiveAvgPool2d((1,1)),
                        nn.Flatten(),
                        nn.Linear(512, num_labels)
                    )
    return net

## Call the Models

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'mps')
print('The device is:', device)

# check if mps on macbook is availabel
# print(torch.backends.mps.is_available())  # 检查 MPS 是否可用
# print(torch.backends.mps.is_built())      # 检查 MPS 是否已编译

The device is: cuda


In [7]:
# create list to store all the parameters and the number of MACs, be careful of the different datasets
# to avoid the error of the number of input channels and any other mistake, try to use different dictionaries to store each dataset
# create different empty dictionary
macs_f = {}
paras_f = {}
macs_c100 = {}
paras_c100 = {}
macs_c10 = {}
paras_c10 = {}

### usea function to call the models

In [8]:
# create a function for all the models to run
# image channel for fashion mnist 
channel_f = 1
# image channel for cifar100 and cifar10
channel_c = 3

# number of labels for fashion mnist
num_labels_f = 10
# number of labels for cifar100 
num_labels_c100 = 100
# number of labels for cifar10
num_labels_c10 = 10

def get_model_info(model, img_channel, num_labels):
    model_ini = model.__name__
    print(f'The model name is {model_ini}')

    net = model(img_channel, num_labels)
    
    return net

### import all models

In [9]:
# ResNet
# resnet18
resnet18_f = get_model_info(resnet18, channel_f, num_labels_f)
resnet18_c100 = get_model_info(resnet18, channel_c, num_labels_c100)
resnet18_c10 = get_model_info(resnet18, channel_c, num_labels_c10)

The model name is resnet18
The model name is resnet18


The model name is resnet18


## Import the Datasets for training

In [10]:
print(models_name)

['resnet18']


In [11]:
# create model list according to models_name order
models_f_list = [resnet18_f]

models_c100_list = [resnet18_c100]

models_c10_list = [resnet18_c10]

### show the output size of each layers after the picture is passed through the model

In [12]:
X_f = torch.randn(size=(1, 1, 224, 224), dtype=torch.float32) # fashion mnist

layerlist_resnet18 = []
for layer in resnet18_f:
    name = layer.__class__.__name__
    if name == 'Sequential':
        for l in layer:
            inner_name = l.__class__.__name__
            if inner_name == 'Residual18':
                layerlist_resnet18.append('residual_'+l.conv1.__class__.__name__+'_1')
                layerlist_resnet18.append('residual_'+l.bn1.__class__.__name__+'_1')
                layerlist_resnet18.append('residual_'+l.relu1.__class__.__name__+'_1')
                layerlist_resnet18.append('residual_'+l.conv2.__class__.__name__+'_2')   
                layerlist_resnet18.append('residual_'+l.bn2.__class__.__name__+'_2')
                if l.conv3 is not None:  # 确保 conv3 存在
                    layerlist_resnet18.append('residual_' + l.conv3.__class__.__name__+'_3')      
                layerlist_resnet18.append('residual_'+l.relu2.__class__.__name__+'_2')
            else:
                layerlist_resnet18.append(inner_name)
    else:
        layerlist_resnet18.append(name)

print(f'layerlist_resnet18: {layerlist_resnet18}')


layerlist_resnet18: ['Conv2d', 'BatchNorm2d', 'ReLU', 'MaxPool2d', 'residual_Conv2d_1', 'residual_BatchNorm2d_1', 'residual_ReLU_1', 'residual_Conv2d_2', 'residual_BatchNorm2d_2', 'residual_ReLU_2', 'residual_Conv2d_1', 'residual_BatchNorm2d_1', 'residual_ReLU_1', 'residual_Conv2d_2', 'residual_BatchNorm2d_2', 'residual_ReLU_2', 'residual_Conv2d_1', 'residual_BatchNorm2d_1', 'residual_ReLU_1', 'residual_Conv2d_2', 'residual_BatchNorm2d_2', 'residual_Conv2d_3', 'residual_ReLU_2', 'residual_Conv2d_1', 'residual_BatchNorm2d_1', 'residual_ReLU_1', 'residual_Conv2d_2', 'residual_BatchNorm2d_2', 'residual_ReLU_2', 'residual_Conv2d_1', 'residual_BatchNorm2d_1', 'residual_ReLU_1', 'residual_Conv2d_2', 'residual_BatchNorm2d_2', 'residual_Conv2d_3', 'residual_ReLU_2', 'residual_Conv2d_1', 'residual_BatchNorm2d_1', 'residual_ReLU_1', 'residual_Conv2d_2', 'residual_BatchNorm2d_2', 'residual_ReLU_2', 'residual_Conv2d_1', 'residual_BatchNorm2d_1', 'residual_ReLU_1', 'residual_Conv2d_2', 'residual_Ba

In [13]:
layer_count = 0
for layer in resnet18_f:
    name = layer.__class__.__name__
    if name == 'Sequential':
        residual_block = 0
        for l in layer:
            inner_name = l.__class__.__name__
            if inner_name == 'Residual18':
                print(l.conv1)
                
                
                
    else:
        name = name + '_' + str(layer_count)
        layer_count += 1

Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))


### load all the datas:  
    1. FashionMNIST
    2. CIFAR100
    3. CIFAR10

In [14]:
# load the data
# fashion mnist
def get_dataloader_workers():
    """Use 4 processes to read the data.

    Defined in :numref:`sec_utils`"""
    return 4

def load_data_fashion_mnist(batch_size, resize=None):
    """下载Fashion-MNIST数据集, 然后将其加载到内存中

    Defined in :numref:`sec_fashion_mnist`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=get_dataloader_workers()))

def load_data_cifar100(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR100(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR100(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))
    
def load_data_cifar10(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR10(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR10(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))
    

### set the training parameters

In [15]:
batch_size = [128]
epochs = [5]
rounds = 1
lr = 0.01

## Train Function

#### using pynvml to get the GPU power consumption

In [16]:
def nvml_sampling_thread(handle, filename, stop_event, sampling_interval):
    """
    在单独的线程中定期调用 NVML, 获取功耗数据并存储到 data_queue 中。
    参数：
    - handle: nvmlDeviceGetHandleByIndex(0) 得到的 GPU 句柄
    - data_queue: 用于存放 (timestamp, power_in_watts) 数据的队列
    - stop_event: 当此事件被设置时，线程应结束循环
    - sampling_interval: 采样间隔（秒）
    """
    with open(filename/'energy_consumption_file.csv', 'a') as f:  # 追加模式
        # 写入列名
        f.write("timestamp,power_in_watts\n")
        while not stop_event.is_set():
            try:
                # 采集功率和时间戳
                current_time = time.time()
                current_power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0  # 转换 mW -> W
                # 写入文件
                f.write(f"{current_time},{current_power}\n")
                # 等待下一次采样
                time.sleep(sampling_interval)
            except pynvml.NVMLError as e:
                print(f"NVML Error: {e}")
                break

#### set the interval of the power consumption

In [17]:
def integrate_power_over_interval(samples, start_time, end_time):
    # 假定 samples是按时间升序排序的 (t, p)
    # 若未排序，请先排序:
    # samples = sorted(samples, key=lambda x: x[0])
    
    def interpolate(samples, target_time):
        # 在 samples 中找到 target_time 左右最近的两个点，并进行线性插值
        # 若 target_time 恰好等于某个样本点时间，直接返回该点功率
        # 若无法找到两侧点（如 target_time在样本时间轴外），根据情况返回None或边界点
        n = len(samples)
        if n == 0:
            return None
        # 若 target_time 小于第一个样本点时间，无法向左插值，这里直接返回第一个点的功率值(或None)
        if target_time <= samples[0][0]:
            # 简化处理：返回最早样本点的功率（或None）
            return samples[0][1]
        # 若 target_time 大于最后一个样本点时间，无法向右插值，返回最后一个点的功率（或None）
        if target_time >= samples[-1][0]:
            return samples[-1][1]

        # 否则，在中间插值
        # 使用二分查找快速定位
        import bisect
        times = [t for t, _ in samples]
        pos = bisect.bisect_left(times, target_time)
        # pos是使times保持有序插入target_time的位置
        # 因为target_time不在已有样本点中，pos不会越界且pos>0且pos<n
        t1, p1 = samples[pos-1]
        t2, p2 = samples[pos]
        # 线性插值： p = p1 + (p2 - p1)*((target_time - t1)/(t2 - t1))
        ratio = (target_time - t1) / (t2 - t1)
        p = p1 + (p2 - p1)*ratio
        return p

    # 从原始 samples 中筛选出位于[start_time, end_time]内的点
    filtered = [(t, p) for t, p in samples if start_time <= t <= end_time]

    # 如果不足2个点，则尝试使用插值
    if len(filtered) < 2:
        # 无论如何都需要在边界处插值出两个点(起码start和end)
        start_power = interpolate(samples, start_time)
        end_power = interpolate(samples, end_time)

        # 如果从样本中无法插值出任何有意义的点（比如samples为空或无法插值），返回0.0
        if start_power is None or end_power is None:
            return 0.0

        # 将插值的边界点加入到 filtered
        # 注意：如果filtered中有一个点在区间内，我们也需要确保边界有两点以上
        # 例如filtered只有一个点在中间，则需要在start和end插值点全部加入。
        # 若filtered为空，则只用start/end两点插值点求积分
        new_filtered = [(start_time, start_power)] + filtered + [(end_time, end_power)]
        # 确保按时间排序
        new_filtered.sort(key=lambda x: x[0])
        filtered = new_filtered

    # 正常积分计算
    if len(filtered) < 2:
        # 经过插值仍不够，返回0
        return 0.0

    total_energy = 0.0
    for i in range(len(filtered)-1):
        t1, p1 = filtered[i]
        t2, p2 = filtered[i+1]
        dt = t2 - t1
        avg_p = (p1 + p2)/2.0
        total_energy += avg_p * dt

    return total_energy

#### set the training process

In [18]:
def train_func(net, train_iter, test_iter, num_epochs, lr, device, filename, sampling_interval):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    # print(f'The name of the layers are: {alexlayer}')
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    # save all epochs time data using list
    to_device_intervals_total = []
    forward_intervals_total = []
    loss_intervals_total = []
    backward_intervals_total = []
    optimize_intervals_total = []
    test_intervals_total = []

    # create a dictionary to store each layer time period data in each batch
    layer_time = {}

    # create a list to store the epoch time data
    epoch_intervals_total = []
    
    # 初始化NVML和采样线程
    pynvml.nvmlInit()
    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
    power_data_queue = queue.Queue()
    stop_event = threading.Event()
    sampler_thread = threading.Thread(target=nvml_sampling_thread, args=(handle, filename, stop_event, sampling_interval))
    sampler_thread.start()

    for epoch in range(num_epochs):
        layer_time[str(epoch)] = {}

        print('The epoch is:', epoch+1)
        metric = d2l.Accumulator(3)  # train_loss, train_acc, num_examples
        to_device_intervals_epoch = []  # 用来记录本epoch每个batch的to_device时间段
        forward_intervals_epoch = []  # 用来记录本epoch每个batch的forward时间段
        loss_intervals_epoch = []  # 用来记录本epoch每个batch的loss时间段
        backward_intervals_epoch = [] 
        optimize_intervals_epoch = []
        test_intervals_epoch = []   
        epoch_intervals_epoch = []  # 用来记录本epoch的时间段

        epoch_start_time = time.time()

        net.train()
        for i, (X, y) in enumerate(train_iter):
            print('The batch is:', i+1)

            layer_time[str(epoch)][str(i)] = {}
            optimizer.zero_grad()
            torch.cuda.synchronize()

            # 记录to_device前后的时间戳
            start_ttd_time = time.time()
            X, y = X.to(device), y.to(device)
            torch.cuda.synchronize()
            end_ttd_time = time.time()
            to_device_intervals_epoch.append((start_ttd_time, end_ttd_time))

            # forward
            start_forward_time = time.time()
            y_hat = X

            '''alexnet'''
            # layer_count = 0

            '''resnet18'''
            block_count = 0

            for block in net:
                name = block.__class__.__name__ # get the name of the layer
                name_show = name + '_' + str(block_count)
                print(f'The name of the layer is: {name_show}')
                layer_time[str(epoch)][str(i)][name_show] = {}
                '''alexnet'''
                # name = name + '_' + str(layer_count)
                # layer_count += 1
                # if name in layerlist_alexnet:
                #     start_layer_time = time.time()
                #     y_hat = layer(y_hat)
                #     torch.cuda.synchronize()
                #     end_layer_time = time.time()
                #     layer_time[str(epoch)][str(i)][name] = (start_layer_time, end_layer_time)
                '''resnet18'''
                if name == 'Sequential':
                    resblock_num = 0
                    # iterate the subblock in the block
                    for subblock in block:
                        subblock_name = subblock.__class__.__name__
                        # determine if the layer is Residual18
                        if subblock_name == 'Residual18':
                            # clear the layer_time
                            subblock.layer_time = {}
                            y_hat = subblock(y_hat)
                            torch.cuda.synchronize()
                            print(subblock.layer_time)
                            subblock_name = subblock_name + '_' + str(resblock_num)
                            layer_time[str(epoch)][str(i)][name_show][subblock_name] = subblock.layer_time
                        # if the inner layer is not Residual18
                        else:
                            # print the current layer name
                            print(subblock_name)
                            start_layer_time = time.time()
                            y_hat = subblock(y_hat)
                            torch.cuda.synchronize()
                            end_layer_time = time.time()
                            layer_time[str(epoch)][str(i)][name_show][subblock_name] = (start_layer_time, end_layer_time)
                        resblock_num += 1
                    block_count += 1
                else:
                    start_layer_time = time.time()
                    y_hat = block(y_hat)
                    torch.cuda.synchronize()
                    end_layer_time = time.time()
                    layer_time[str(epoch)][str(i)][name_show] = (start_layer_time, end_layer_time)

            # y_hat = net(X)
            # torch.cuda.synchronize()
            end_forward_time = time.time()
            forward_intervals_epoch.append((start_forward_time, end_forward_time))

            # loss
            start_loss_time = time.time()
            l = loss_fn(y_hat, y)
            torch.cuda.synchronize()
            end_loss_time = time.time()
            loss_intervals_epoch.append((start_loss_time, end_loss_time))

            # backward
            start_backward_time = time.time()
            l.backward()
            torch.cuda.synchronize()
            end_backward_time = time.time()
            backward_intervals_epoch.append((start_backward_time, end_backward_time))

            # optimize
            start_optimize_time = time.time()
            optimizer.step()
            torch.cuda.synchronize()
            end_optimize_time = time.time()
            optimize_intervals_epoch.append((start_optimize_time, end_optimize_time))

            with torch.no_grad():
                metric.add(l*X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            train_acc = metric[1] / metric[2]

        start_test_time = time.time()
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        end_test_time = time.time()
        print(f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')
        test_intervals_epoch.append((start_test_time, end_test_time))

        epoch_end_time = time.time()
        epoch_intervals_epoch.append((epoch_start_time, epoch_end_time))

        # data need to be saved
        # add the intervals_epoch to intervals_total
        to_device_intervals_total.append(to_device_intervals_epoch)
        forward_intervals_total.append(forward_intervals_epoch)
        loss_intervals_total.append(loss_intervals_epoch)
        backward_intervals_total.append(backward_intervals_epoch)
        optimize_intervals_total.append(optimize_intervals_epoch)
        test_intervals_total.append(test_intervals_epoch)
        epoch_intervals_total.append(epoch_intervals_epoch)


    # 训练结束后关闭线程
    stop_event.set()
    sampler_thread.join()

    pynvml.nvmlShutdown()

    return to_device_intervals_total, forward_intervals_total, loss_intervals_total, backward_intervals_total, optimize_intervals_total, test_intervals_total, epoch_intervals_total, layer_time

## Start to train the model

### set a function to train the model with FashionMNIST datasets

In [19]:
def train_model(main_folder, batch_size, num_epochs, round, lr, device, sample_interval, net):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    # epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    sr_number = int(sample_interval*1000)
    epoch_batch_folder = f'E{num_epochs}_B{batch_size}_R{round}_SR{sr_number}_layer'

    data_dir = 'fashion_mnist'
    # data_dir = 'cifar100'
    # data_dir = 'cifar10'

    # the folder path is main_folder/epoch_batch_folder
    folder_path = main_folder/epoch_batch_folder/data_dir
    print(f'The folder path is: {folder_path}')
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    else:
        pass
    
    train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
    # show the shape of the data
    list_of_i = []
    for i, (X, y) in enumerate(train_iter):
        if i < 3:
            print('the shape of the', i, 'batch of the train_iter is:', X.shape)
        else:
            pass
        list_of_i.append(i)
    print(f'The number of batches is: {np.array(list_of_i).shape}')
    to_device_intervals_total, forward_intervals_total, loss_intervals_total,\
          backward_intervals_total, optimize_intervals_total, test_intervals_total, epoch_intervals_total, layer_time_alexnet = train_func(net, train_iter, test_iter, num_epochs, lr, device, folder_path, sample_interval)

    # transfer the data to the numpy array
    to_device_data = np.array(to_device_intervals_total)
    forward_time = np.array(forward_intervals_total)
    loss_time = np.array(loss_intervals_total)
    backward_time = np.array(backward_intervals_total)
    optimize_time = np.array(optimize_intervals_total)
    test_time = np.array(test_intervals_total)
    epoch_time = np.array(epoch_intervals_total)

    # save the layer_time_alexnet, the type is a dictionary, need to be saved as a csv file
    # the first column is the epoch, the second column is the batch, the third column is the layer name, the fourth column is the start time, the fifth column is the end time
    layer_time_alexnet_df = pd.DataFrame.from_dict(layer_time_alexnet)
    


    # print(layer_time_alexnet_df)

    # save the data
    np.save(folder_path/'to_device.npy', to_device_data, allow_pickle=True)
    np.save(folder_path/'forward.npy', forward_time, allow_pickle=True)
    np.save(folder_path/'loss.npy', loss_time, allow_pickle=True)
    np.save(folder_path/'backward.npy', backward_time, allow_pickle=True)
    np.save(folder_path/'optimize.npy', optimize_time, allow_pickle=True)
    np.save(folder_path/'test.npy', test_time, allow_pickle=True)
    np.save(folder_path/'epoch.npy', epoch_time, allow_pickle=True)
    layer_time_alexnet_df.to_csv(folder_path/'layer_time.csv', index=False)


### run the model

In [20]:
# sampling_interval = 0.002 # 2ms
sampling_interval = 0.002 # 1ms
# create the folder to store the data
main_folder = DataList[0]

print('The folder is:', main_folder)
    # find out that if the folder exists in the data path
    # 判断文件是否存在
if main_folder.exists():
    print("文件存在。")
else:
    os.makedirs(main_folder)
    print("文件不存在，已创建。")
    print("文件创建于：", main_folder)
for epoch in epochs:
    for batch in batch_size:
        for round in range(rounds):
            train_model(main_folder, batch, epoch, round, lr, device, sampling_interval, resnet18_f)

The folder is: /root/autodl-tmp/GreenAI/3080/ModelsData/resnet18
文件存在。
The epoch is set: 5, batch is set: 128, is in 1th running
The folder path is: /root/autodl-tmp/GreenAI/3080/ModelsData/resnet18/E5_B128_R0_SR2_layer/fashion_mnist


the shape of the 0 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([128, 1, 224, 224])
The number of batches is: (469,)
training on cuda
The epoch is: 1
The batch is: 1
The name of the layer is: Sequential_0
Conv2d
BatchNorm2d
ReLU
MaxPool2d
The name of the layer is: Sequential_1
{'conv1': 0.016913652420043945, 'bn1': 0.0008652210235595703, 'relu1': 0.0006337165832519531, 'conv2': 0.0020477771759033203, 'bn2': 0.0006935596466064453, 'residual_add_relu2': 0.0010988712310791016}
{'conv1': 0.0031249523162841797, 'bn1': 0.0006582736968994141, 'relu1': 0.0006229877471923828, 'conv2': 0.0020554065704345703, 'bn2': 0.0007216930389404297, 'residual_add_relu2': 0.0009915828704833984}
The name of the layer is: Sequential_2
{'conv1': 0.005850076675415039, 'bn1': 0.00047135353088378906, 'relu1': 0.000202178955078125, 'conv2': 0.0053288936614990234, 'bn2