# This code is to gather the information of the energy consumption of the whole training process of different models.

## import the required libraries

In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import torchvision
import torchvision.transforms as transforms
from d2l import torch as d2l
import numpy as np
import pandas as pd
from ptflops import get_model_complexity_info
from pathlib import Path
import os
import time
import pynvml
import threading
import queue# type: ignore
from torch.cuda.amp import GradScaler

os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

## find the path

In [2]:
'''find the Model path'''
# find the current path
from pathlib import Path

# find the current path
current_path = Path.cwd()
print('The current path is:', current_path)

# find the data path
data_path = Path(current_path / 'ModelsData')
print('The data path is:', data_path)

The current path is: /root/autodl-tmp/GreenAI/3080
The data path is: /root/autodl-tmp/GreenAI/3080/ModelsData


## Models

### generate the data paths

In [3]:
models_name = ['vgg11']

In [4]:
DataList = [Path(f"{data_path}/{i}") for i in models_name]
print(DataList)

[PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/alexnet'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/vgg11'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/vgg13'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/vgg16'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/resnet18'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/resnet34'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/resnet50'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_origin'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod1'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod2'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod3'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod4'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod5'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod6'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod7'),

### create the models

##### VGG11 Model

In [6]:
def vgg11_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

def vgg11(input_channels, output_channels):
    conv_arch = [(1, 64), (1, 128), (2, 256), (2, 512), (2, 512)]
    conv_arch = [(1, 64), (1, 128), (2, 256), (2, 512), (2, 512)]
    in_channels = input_channels  # For RGB images
    # Create convolutional layers
    conv_layers = []
    for num_convs, out_channels in conv_arch:
        conv_layers.append(vgg11_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_layers, nn.Flatten(),
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, output_channels)  # Output layer for 1000 classes
    )

## Call the Models

### usea function to call the models

In [24]:
# create a function for all the models to run
# image channel for fashion mnist 
channel_f = 1
# image channel for cifar100 and cifar10
channel_c = 3

# number of labels for fashion mnist
num_labels_f = 10
# number of labels for cifar100 
num_labels_c100 = 100
# number of labels for cifar10
num_labels_c10 = 10

def get_model_info(model, img_channel, num_labels):
    net = model(img_channel, num_labels)
    with torch.cuda.device(0):
        macs, params = get_model_complexity_info(net, (img_channel, 224, 224), as_strings=True,
                                                print_per_layer_stat=True, verbose=True)
        print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
        print('{:<30}  {:<8}'.format('Number of parameters: ', params))
    return net, macs, params
    

### VGG

#### VGG11

In [29]:
vgg11_f, macs_vgg11_f, paras_vgg11_f = get_model_info(vgg11, channel_f, num_labels_f)
print('-'*50)
vgg11_c100, macs_vgg11_c100, paras_vgg11_c100 = get_model_info(vgg11, channel_c, num_labels_c100)
print('-'*50)
vgg11_c10, macs_vgg11_c10, paras_vgg11_c10 = get_model_info(vgg11, channel_c, num_labels_c10)

Sequential(
  128.81 M, 100.000% Params, 7.57 GMac, 99.821% MACs, 
  (0): Sequential(
    640, 0.000% Params, 38.54 MMac, 0.508% MACs, 
    (0): Conv2d(640, 0.000% Params, 32.11 MMac, 0.424% MACs, 1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 3.21 MMac, 0.042% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 3.21 MMac, 0.042% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    73.86 k, 0.057% Params, 929.66 MMac, 12.262% MACs, 
    (0): Conv2d(73.86 k, 0.057% Params, 926.45 MMac, 12.219% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 1.61 MMac, 0.021% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 1.61 MMac, 0.021% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (2): Sequential(
    885.25 k, 0.687% Params, 2.78 GMac, 36.648% MACs, 
    (0): Conv2d(295.17 k, 0.229% Params, 925.65 MMac, 12.209% MACs, 128, 256, kernel_size=(3,

## Import the Datasets for training

### show the output size of each layers after the picture is passed through the model

In [44]:
print(models_name)

['alexnet', 'vgg11', 'vgg13', 'vgg16', 'resnet18', 'resnet34', 'resnet50', 'googlenet_origin', 'googlenet_mod1', 'googlenet_mod2', 'googlenet_mod3', 'googlenet_mod4', 'googlenet_mod5', 'googlenet_mod6', 'googlenet_mod7', 'googlenet_mod8', 'googlenet_mod9', 'mobilenetv1_path', 'mobilenetv2_path']


In [45]:
# create model list according to models_name order
models_f_list = [vgg11_f]

models_c100_list = [vgg11_c100]

models_c10_list = [vgg11_c10]

In [46]:
# X_f = torch.randn(size=(1, 1, 224, 224), dtype=torch.float32) # fashion mnist

# for model in models_f_list:
#     print(model)
#     for layer in model:
#         X_f=layer(X_f)
#         print(layer.__class__.__name__,'output shape:\t',X_f.shape)
    

### load all the datas:  
    1. FashionMNIST
    2. CIFAR100
    3. CIFAR10

In [47]:
# load the data
# fashion mnist
def get_dataloader_workers():
    """Use 4 processes to read the data.

    Defined in :numref:`sec_utils`"""
    return 4

def load_data_fashion_mnist(batch_size, resize=None):
    """下载Fashion-MNIST数据集, 然后将其加载到内存中

    Defined in :numref:`sec_fashion_mnist`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=get_dataloader_workers()))

def load_data_cifar100(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR100(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR100(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))
    
def load_data_cifar10(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR10(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR10(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))
    

### set the training parameters

In [48]:
batch_size = [128]
epochs = [5]
rounds = 1

## Train Function

#### set the learning rate

In [49]:
lr = 0.01
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('The device is:', device)

The device is: cuda


#### using pynvml to get the GPU power consumption

In [50]:
def nvml_sampling_thread(handle, filename, stop_event, sampling_interval):
    """
    在单独的线程中定期调用 NVML, 获取功耗数据并存储到 data_queue 中。
    参数：
    - handle: nvmlDeviceGetHandleByIndex(0) 得到的 GPU 句柄
    - data_queue: 用于存放 (timestamp, power_in_watts) 数据的队列
    - stop_event: 当此事件被设置时，线程应结束循环
    - sampling_interval: 采样间隔（秒）
    """
    with open(filename/'energy_consumption_file.csv', 'a') as f:  # 追加模式
        # 写入列名
        f.write("timestamp,power_in_watts\n")
        while not stop_event.is_set():
            try:
                # 采集功率和时间戳
                current_time = time.time()
                current_power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0  # 转换 mW -> W
                # 写入文件
                f.write(f"{current_time},{current_power}\n")
                # 等待下一次采样
                time.sleep(sampling_interval)
            except pynvml.NVMLError as e:
                print(f"NVML Error: {e}")
                break

#### set the interval of the power consumption

In [51]:
def integrate_power_over_interval(samples, start_time, end_time):
    # 假定 samples是按时间升序排序的 (t, p)
    # 若未排序，请先排序:
    # samples = sorted(samples, key=lambda x: x[0])
    
    def interpolate(samples, target_time):
        # 在 samples 中找到 target_time 左右最近的两个点，并进行线性插值
        # 若 target_time 恰好等于某个样本点时间，直接返回该点功率
        # 若无法找到两侧点（如 target_time在样本时间轴外），根据情况返回None或边界点
        n = len(samples)
        if n == 0:
            return None
        # 若 target_time 小于第一个样本点时间，无法向左插值，这里直接返回第一个点的功率值(或None)
        if target_time <= samples[0][0]:
            # 简化处理：返回最早样本点的功率（或None）
            return samples[0][1]
        # 若 target_time 大于最后一个样本点时间，无法向右插值，返回最后一个点的功率（或None）
        if target_time >= samples[-1][0]:
            return samples[-1][1]

        # 否则，在中间插值
        # 使用二分查找快速定位
        import bisect
        times = [t for t, _ in samples]
        pos = bisect.bisect_left(times, target_time)
        # pos是使times保持有序插入target_time的位置
        # 因为target_time不在已有样本点中，pos不会越界且pos>0且pos<n
        t1, p1 = samples[pos-1]
        t2, p2 = samples[pos]
        # 线性插值： p = p1 + (p2 - p1)*((target_time - t1)/(t2 - t1))
        ratio = (target_time - t1) / (t2 - t1)
        p = p1 + (p2 - p1)*ratio
        return p

    # 从原始 samples 中筛选出位于[start_time, end_time]内的点
    filtered = [(t, p) for t, p in samples if start_time <= t <= end_time]

    # 如果不足2个点，则尝试使用插值
    if len(filtered) < 2:
        # 无论如何都需要在边界处插值出两个点(起码start和end)
        start_power = interpolate(samples, start_time)
        end_power = interpolate(samples, end_time)

        # 如果从样本中无法插值出任何有意义的点（比如samples为空或无法插值），返回0.0
        if start_power is None or end_power is None:
            return 0.0

        # 将插值的边界点加入到 filtered
        # 注意：如果filtered中有一个点在区间内，我们也需要确保边界有两点以上
        # 例如filtered只有一个点在中间，则需要在start和end插值点全部加入。
        # 若filtered为空，则只用start/end两点插值点求积分
        new_filtered = [(start_time, start_power)] + filtered + [(end_time, end_power)]
        # 确保按时间排序
        new_filtered.sort(key=lambda x: x[0])
        filtered = new_filtered

    # 正常积分计算
    if len(filtered) < 2:
        # 经过插值仍不够，返回0
        return 0.0

    total_energy = 0.0
    for i in range(len(filtered)-1):
        t1, p1 = filtered[i]
        t2, p2 = filtered[i+1]
        dt = t2 - t1
        avg_p = (p1 + p2)/2.0
        total_energy += avg_p * dt

    return total_energy

#### set the training process

In [52]:
def train_func(net, train_iter, test_iter, num_epochs, lr, device, filename, sampling_interval):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    # Initialize GradScaler for mixed precision
    scaler = torch.cuda.amp.GradScaler()

    # save all epochs time data using list
    to_device_intervals_total = []
    forward_intervals_total = []
    loss_intervals_total = []
    backward_intervals_total = []
    optimize_intervals_total = []
    test_intervals_total = []

    # create a list to store the epoch time data
    epoch_intervals_total = []
    
    # Initialize NVML and sampling thread
    pynvml.nvmlInit()
    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
    stop_event = threading.Event()
    sampler_thread = threading.Thread(target=nvml_sampling_thread, args=(handle, filename, stop_event, sampling_interval))
    sampler_thread.start()

    for epoch in range(num_epochs):
        print('The epoch is:', epoch+1)
        metric = d2l.Accumulator(3)  # train_loss, train_acc, num_examples
        to_device_intervals_epoch = []  # 用来记录本epoch每个batch的to_device时间段
        forward_intervals_epoch = []  # 用来记录本epoch每个batch的forward时间段
        loss_intervals_epoch = []  # 用来记录本epoch每个batch的loss时间段
        backward_intervals_epoch = [] 
        optimize_intervals_epoch = []
        test_intervals_epoch = []   
        epoch_intervals_epoch = []  # 用来记录本epoch的时间段

        epoch_start_time = time.time()

        net.train()
        for i, (X, y) in enumerate(train_iter):
            print('The batch is:', i+1)
            optimizer.zero_grad()
            torch.cuda.synchronize()

            # 记录to_device前后的时间戳
            start_ttd_time = time.time()
            X, y = X.to(device), y.to(device)
            torch.cuda.synchronize()
            end_ttd_time = time.time()
            to_device_intervals_epoch.append((start_ttd_time, end_ttd_time))

            # forward with autocast
            start_forward_time = time.time()
            with torch.cuda.amp.autocast():
                y_hat = net(X)
                l = loss_fn(y_hat, y)  # loss inside autocast
            torch.cuda.synchronize()
            end_forward_time = time.time()
            forward_intervals_epoch.append((start_forward_time, end_forward_time))

            # backward with scaler
            start_backward_time = time.time()
            scaler.scale(l).backward()
            torch.cuda.synchronize()
            end_backward_time = time.time()
            backward_intervals_epoch.append((start_backward_time, end_backward_time))

            # optimize with scaler
            start_optimize_time = time.time()
            scaler.step(optimizer)
            scaler.update()
            torch.cuda.synchronize()
            end_optimize_time = time.time()
            optimize_intervals_epoch.append((start_optimize_time, end_optimize_time))

            with torch.no_grad():
                metric.add(l.item() * X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            train_acc = metric[1] / metric[2]

            # Free memory for the batch
            del X, y, y_hat, l
            torch.cuda.empty_cache()

        # Evaluation (test)
        start_test_time = time.time()
        with torch.no_grad():
            test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        end_test_time = time.time()
        print(f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')
        test_intervals_epoch.append((start_test_time, end_test_time))

        epoch_end_time = time.time()
        epoch_intervals_epoch.append((epoch_start_time, epoch_end_time))

        # data need to be saved
        # add the intervals_epoch to intervals_total
        to_device_intervals_total.append(to_device_intervals_epoch)
        forward_intervals_total.append(forward_intervals_epoch)
        loss_intervals_total.append(loss_intervals_epoch)
        backward_intervals_total.append(backward_intervals_epoch)
        optimize_intervals_total.append(optimize_intervals_epoch)
        test_intervals_total.append(test_intervals_epoch)
        epoch_intervals_total.append(epoch_intervals_epoch)

    # End training and close thread
    stop_event.set()
    sampler_thread.join()
    pynvml.nvmlShutdown()

    return to_device_intervals_total, forward_intervals_total, loss_intervals_total, backward_intervals_total, optimize_intervals_total, test_intervals_total, epoch_intervals_total

In [53]:
# def train_func(net, train_iter, test_iter, num_epochs, lr, device, filename, sampling_interval):
#     def init_weights(m):
#         if type(m) == nn.Linear or type(m) == nn.Conv2d:
#             nn.init.xavier_uniform_(m.weight)
#     net.apply(init_weights)
#     print('training on', device)
#     net.to(device)
#     # print(f'The name of the layers are: {alexlayer}')
#     optimizer = torch.optim.SGD(net.parameters(), lr=lr)
#     loss_fn = nn.CrossEntropyLoss()

#     scaler = torch.cuda.amp.GradScaler()

#     # save all epochs time data using list
#     to_device_intervals_total = []
#     forward_intervals_total = []
#     loss_intervals_total = []
#     backward_intervals_total = []
#     optimize_intervals_total = []
#     test_intervals_total = []

#     # create a list to store the epoch time data
#     epoch_intervals_total = []
    
#     # 初始化NVML和采样线程
#     pynvml.nvmlInit()
#     handle = pynvml.nvmlDeviceGetHandleByIndex(0)
#     stop_event = threading.Event()
#     sampler_thread = threading.Thread(target=nvml_sampling_thread, args=(handle, filename, stop_event, sampling_interval))
#     sampler_thread.start()

#     for epoch in range(num_epochs):
#         print('The epoch is:', epoch+1)
#         metric = d2l.Accumulator(3)  # train_loss, train_acc, num_examples
#         to_device_intervals_epoch = []  # 用来记录本epoch每个batch的to_device时间段
#         forward_intervals_epoch = []  # 用来记录本epoch每个batch的forward时间段
#         loss_intervals_epoch = []  # 用来记录本epoch每个batch的loss时间段
#         backward_intervals_epoch = [] 
#         optimize_intervals_epoch = []
#         test_intervals_epoch = []   
#         epoch_intervals_epoch = []  # 用来记录本epoch的时间段

#         epoch_start_time = time.time()

#         net.train()
#         for i, (X, y) in enumerate(train_iter):
#             print('The batch is:', i+1)
#             optimizer.zero_grad()
#             torch.cuda.synchronize()

#             # 记录to_device前后的时间戳
#             start_ttd_time = time.time()
#             X, y = X.to(device), y.to(device)
#             torch.cuda.synchronize()
#             end_ttd_time = time.time()
#             to_device_intervals_epoch.append((start_ttd_time, end_ttd_time))

#             # forward
#             start_forward_time = time.time()
#             y_hat = net(X)
#             torch.cuda.synchronize()
#             end_forward_time = time.time()
#             forward_intervals_epoch.append((start_forward_time, end_forward_time))

#             # loss
#             start_loss_time = time.time()
#             l = loss_fn(y_hat, y)
#             torch.cuda.synchronize()
#             end_loss_time = time.time()
#             loss_intervals_epoch.append((start_loss_time, end_loss_time))

#             # backward
#             start_backward_time = time.time()
#             l.backward()
#             torch.cuda.synchronize()
#             end_backward_time = time.time()
#             backward_intervals_epoch.append((start_backward_time, end_backward_time))

#             # optimize
#             start_optimize_time = time.time()
#             optimizer.step()
#             torch.cuda.synchronize()
#             end_optimize_time = time.time()
#             optimize_intervals_epoch.append((start_optimize_time, end_optimize_time))

#             with torch.no_grad():
#                 metric.add(l*X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
#             train_acc = metric[1] / metric[2]

#             del X, y, y_hat, l
#             torch.cuda.empty_cache()

#         start_test_time = time.time()
#         test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
#         end_test_time = time.time()
#         print(f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')
#         test_intervals_epoch.append((start_test_time, end_test_time))

#         epoch_end_time = time.time()
#         epoch_intervals_epoch.append((epoch_start_time, epoch_end_time))

#         # data need to be saved
#         # add the intervals_epoch to intervals_total
#         to_device_intervals_total.append(to_device_intervals_epoch)
#         forward_intervals_total.append(forward_intervals_epoch)
#         loss_intervals_total.append(loss_intervals_epoch)
#         backward_intervals_total.append(backward_intervals_epoch)
#         optimize_intervals_total.append(optimize_intervals_epoch)
#         test_intervals_total.append(test_intervals_epoch)
#         epoch_intervals_total.append(epoch_intervals_epoch)


#     # 训练结束后关闭线程
#     stop_event.set()
#     sampler_thread.join()

#     pynvml.nvmlShutdown()

#     return to_device_intervals_total, forward_intervals_total, loss_intervals_total, backward_intervals_total, optimize_intervals_total, test_intervals_total, epoch_intervals_total

## Start to train the model

### set a function to train the model with FashionMNIST datasets

In [54]:
def train_model_f(main_folder, batch_size, num_epochs, round, lr, device, sample_interval, net):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    # epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    sr_number = int(sample_interval*1000)
    epoch_batch_folder = f'E{num_epochs}_B{batch_size}_R{round}_SR{sr_number}'
    dataset_dir = 'fashion_mnist'

    # the folder path is main_folder/epoch_batch_folder
    folder_path = main_folder/epoch_batch_folder/dataset_dir
    print(f'The folder path is: {folder_path}')
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    else:
        pass
    
    train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
    # show the shape of the data
    list_of_i = []
    for i, (X, y) in enumerate(train_iter):
        if i < 3:
            print('the shape of the', i, 'batch of the train_iter is:', X.shape)
        else:
            pass
        list_of_i.append(i)
    print(f'The number of batches is: {np.array(list_of_i).shape}')
    to_device_intervals_total, forward_intervals_total, loss_intervals_total,\
          backward_intervals_total, optimize_intervals_total, test_intervals_total, epoch_intervals_total = train_func(net, train_iter, test_iter, num_epochs, lr, device, folder_path, sample_interval)

    # transfer the data to the numpy array
    to_device_data = np.array(to_device_intervals_total)
    forward_time = np.array(forward_intervals_total)
    loss_time = np.array(loss_intervals_total)
    backward_time = np.array(backward_intervals_total)
    optimize_time = np.array(optimize_intervals_total)
    test_time = np.array(test_intervals_total)
    epoch_time = np.array(epoch_intervals_total)

    # save the data
    np.save(folder_path/'to_device.npy', to_device_data, allow_pickle=True)
    np.save(folder_path/'forward.npy', forward_time, allow_pickle=True)
    np.save(folder_path/'loss.npy', loss_time, allow_pickle=True)
    np.save(folder_path/'backward.npy', backward_time, allow_pickle=True)
    np.save(folder_path/'optimize.npy', optimize_time, allow_pickle=True)
    np.save(folder_path/'test.npy', test_time, allow_pickle=True)
    np.save(folder_path/'epoch.npy', epoch_time, allow_pickle=True)


### run the model

In [1]:
sampling_interval = 0.002 # 2ms
# create the folder to store the data
datasets = ['fashion_mnist', 'cifar100', 'cifar10']

for i in range(len(models_f_list)):
    # for each start, clear the cache in the gpu 
    torch.cuda.empty_cache()
    # for each start, clear the memory in the gpu using the torch
    net = models_f_list[i]
    main_folder = DataList[i]
    main_folder = os.path.join(main_folder, datasets[0])  
    # main_folder = os.path.join(main_folder, datasets[1])  
    # main_folder = os.path.join(main_folder, datasets[2])  

    print('The folder is:', main_folder)
    if main_folder.exists():
        print("文件存在。")
    else:
        os.makedirs(main_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", main_folder)
    for epoch in epochs:
        for batch in batch_size:
            for round in range(rounds):
                train_model_f(main_folder, batch, epoch, round, lr, device, sampling_interval, net)

NameError: name 'models_f_list' is not defined

In [56]:
# sampling_interval = 0.002 # 2ms
# # # create the folder to store the data

# # for i in range(len(models_f_list)):
# # 2024.12.18-yj: 由于之前的运行 gpu 内存爆了,需要继续后续训练
# # 2024.12.19-yj: mobilenetv2 持续爆内存,需要继续后续训练
# torch.cuda.reset_peak_memory_stats()
# torch.cuda.empty_cache()
# # for each start, clear the memory in the gpu using the torch
# # print(models_f_list[-1])
# net = mobilenetv2_f
# main_folder = DataList[-1]  
# print('The folder is:', main_folder)
# if main_folder.exists():
#     print("文件存在。")
# else:
#     os.makedirs(main_folder)
#     print("文件不存在，已创建。")
#     print("文件创建于：", main_folder)
# for epoch in epochs:
#     for batch in batch_size:
#         for round in range(rounds):
#             train_model_f(main_folder, batch, epoch, round, lr, device, sampling_interval, net)

The folder is: /root/autodl-tmp/GreenAI/3080/ModelsData/mobilenetv2_path
文件存在。
The epoch is set: 5, batch is set: 256, is in 1th running
The folder path is: /root/autodl-tmp/GreenAI/3080/ModelsData/mobilenetv2_path/E5_B256_R0_SR2
the shape of the 0 batch of the train_iter is: torch.Size([256, 1, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([256, 1, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([256, 1, 224, 224])
The number of batches is: (235,)
training on cuda
The epoch is: 1


  scaler = torch.cuda.amp.GradScaler()


The batch is: 1


  with torch.cuda.amp.autocast():


The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 21
The batch is: 22
The batch is: 23
The batch is: 24
The batch is: 25
The batch is: 26
The batch is: 27
The batch is: 28
The batch is: 29
The batch is: 30
The batch is: 31
The batch is: 32
The batch is: 33
The batch is: 34
The batch is: 35
The batch is: 36
The batch is: 37
The batch is: 38
The batch is: 39
The batch is: 40
The batch is: 41
The batch is: 42
The batch is: 43
The batch is: 44
The batch is: 45
The batch is: 46
The batch is: 47
The batch is: 48
The batch is: 49
The batch is: 50
The batch is: 51
The batch is: 52
The batch is: 53
The batch is: 54
The batch is: 55
The batch is: 56
The batch is: 57
The batch is: 58
The batch is: 59
The batch is: 60
The b