In [1]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from d2l import torch as d2l
import numpy as np
import pandas as pd
from ptflops import get_model_complexity_info
from pathlib import Path
import subprocess
import os
import time
from pynvml import *
import threading
import queue

### Path

In [2]:
'''find the Model path'''
# find the current path
current_path = os.getcwd()
print('The current path is:', current_path)

# find the parent path
parent_path = Path(current_path).parent
print('The parent path is:', parent_path)

# find the data path
data_path = parent_path / '3080/AlexNet_test_data'
print('The data path is:', data_path)

The current path is: /home/GreenAI/3080
The parent path is: /home/GreenAI
The data path is: /home/GreenAI/3080/AlexNet_test_data


### Model

In [3]:
def alexnet(img_channel, num_labels):
    net = nn.Sequential(
        # 这里使用一个11*11的更大窗口来捕捉对象。
        # 同时，步幅为4，以减少输出的高度和宽度。
        # 另外，输出通道的数目远大于LeNet
        nn.Conv2d(img_channel, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        # 减小卷积窗口，使用填充为2来使得输入与输出的高和宽一致，且增大输出通道数
        nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        # 使用三个连续的卷积层和较小的卷积窗口。
        # 除了最后的卷积层，输出通道的数量进一步增加。
        # 在前两个卷积层之后，汇聚层不用于减少输入的高度和宽度
        nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        nn.AdaptiveAvgPool2d((6, 6)),   # 使用全局平均池化对每个通道中所有元素求平均并直接将结果传递到全连接层
        nn.Flatten(),
        # 这里，全连接层的输出数量是LeNet中的好几倍。使用dropout层来减轻过拟合
        nn.Linear(256 * 6 * 6, 4096), nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(4096, 4096), nn.ReLU(),
        nn.Dropout(p=0.5),
        # 最后是输出层。由于这里使用Fashion-MNIST，所以用类别数为10，而非论文中的1000
        nn.Linear(4096, num_labels))
    return net

In [4]:
# print the model structure
net = alexnet(1, 10)    
# print(net)
# print each layer
layer_name = []
for layer in net:
    name = layer.__class__.__name__
    layer_name.append(name)
# find the unique layer name, and fix the order
layer_name = sorted(list(set(layer_name)))
print('The layer name is:', layer_name)
# the number of layers, which contains ['AdaptiveAvgPool2d', 'Conv2d', 'Dropout', 'Flatten', 'Linear', 'MaxPool2d', 'ReLU']
num_layers = len(layer_name) 
print('The number of layers is:', num_layers)

The layer name is: ['AdaptiveAvgPool2d', 'Conv2d', 'Dropout', 'Flatten', 'Linear', 'MaxPool2d', 'ReLU']
The number of layers is: 7


In [5]:
# print the model structure
AlexNet = alexnet(1, 10)    
# print(net)
# print each layer
alexlayer = []
DNNlayer = []
num = 0
for layer in AlexNet:
    num += 1
    name = layer.__class__.__name__
    layer_name = name[:4] + str(num)
    DNNlayer.append(name)
    alexlayer.append(layer_name)
# find the unique layer name, and fix the order
DNNlayer_org = sorted(list(set(DNNlayer)))
print('The layer name is:', alexlayer)
print('The layer name after orged is:', DNNlayer_org)
# the number of layers, which contains ['AdaptiveAvgPool2d', 'Conv2d', 'Dropout', 'Flatten', 'Linear', 'MaxPool2d', 'ReLU']
alexlayer_num = len(alexlayer) 
print('The number of layers is:', alexlayer_num)

The layer name is: ['Conv1', 'ReLU2', 'MaxP3', 'Conv4', 'ReLU5', 'MaxP6', 'Conv7', 'ReLU8', 'Conv9', 'ReLU10', 'Conv11', 'ReLU12', 'MaxP13', 'Adap14', 'Flat15', 'Line16', 'ReLU17', 'Drop18', 'Line19', 'ReLU20', 'Drop21', 'Line22']
The layer name after orged is: ['AdaptiveAvgPool2d', 'Conv2d', 'Dropout', 'Flatten', 'Linear', 'MaxPool2d', 'ReLU']
The number of layers is: 22


build different alexnet model for different datasets

In [6]:
# 对于不同的数据集，要设置不同的img_channel和num_labels
# Fashion-MNIST中的图像通道数为1，类别数为10
alexnet_f = alexnet(1, 10)
'''
# CIFAR100中的图像通道数为3，类别数为100
alexnet_c = alexnet(3, 100)
# CIFAR10中的图像通道数为3，类别数为10
alexnet_c10 = alexnet(3, 10)
'''

'\n# CIFAR100中的图像通道数为3，类别数为100\nalexnet_c = alexnet(3, 100)\n# CIFAR10中的图像通道数为3，类别数为10\nalexnet_c10 = alexnet(3, 10)\n'

In [7]:
# fashion mnist
with torch.cuda.device(0):
    macs_f, params_f = get_model_complexity_info(alexnet_f, (1, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_f))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params_f))
'''
print('*'*50)

# cifar100
with torch.cuda.device(0):
    macs_c, params_c = get_model_complexity_info(alexnet_c, (3, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_c))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params_c))
    
# cifar10
with torch.cuda.device(0):
    macs_c10, params_c10 = get_model_complexity_info(alexnet_c10, (3, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_c10))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params_c10))
'''

Sequential(
  57.03 M, 100.000% Params, 664.65 MMac, 99.868% MACs, 
  (0): Conv2d(7.81 k, 0.014% Params, 23.62 MMac, 3.549% MACs, 1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(0, 0.000% Params, 193.6 KMac, 0.029% MACs, )
  (2): MaxPool2d(0, 0.000% Params, 193.6 KMac, 0.029% MACs, kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(307.39 k, 0.539% Params, 224.09 MMac, 33.671% MACs, 64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(0, 0.000% Params, 139.97 KMac, 0.021% MACs, )
  (5): MaxPool2d(0, 0.000% Params, 139.97 KMac, 0.021% MACs, kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(663.94 k, 1.164% Params, 112.21 MMac, 16.859% MACs, 192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(0, 0.000% Params, 64.9 KMac, 0.010% MACs, )
  (8): Conv2d(884.99 k, 1.552% Params, 149.56 MMac, 22.473% MACs, 384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): R

"\nprint('*'*50)\n\n# cifar100\nwith torch.cuda.device(0):\n    macs_c, params_c = get_model_complexity_info(alexnet_c, (3, 224, 224), as_strings=True,\n                                            print_per_layer_stat=True, verbose=True)\n    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_c))\n    print('{:<30}  {:<8}'.format('Number of parameters: ', params_c))\n    \n# cifar10\nwith torch.cuda.device(0):\n    macs_c10, params_c10 = get_model_complexity_info(alexnet_c10, (3, 224, 224), as_strings=True,\n                                            print_per_layer_stat=True, verbose=True)\n    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_c10))\n    print('{:<30}  {:<8}'.format('Number of parameters: ', params_c10))\n"

### Datasets

In [8]:
X_f = torch.randn(size=(1, 1, 224, 224), dtype=torch.float32) # fashion mnist
'''
X_c = torch.randn(size=(1, 3, 224, 224), dtype=torch.float32) # cifar100
x_c10 = torch.randn(size=(1, 3, 224, 224), dtype=torch.float32) # cifar10
'''

for layer in alexnet_f:
    X_f=layer(X_f)
    print(layer.__class__.__name__,'output shape:\t',X_f.shape)

'''
print('*'*50)

for layer in alexnet_c:
    X_c=layer(X_c)
    print(layer.__class__.__name__,'output shape:\t',X_c.shape)
    
print('*'*50)

for layer in alexnet_c10:
    x_c10=layer(x_c10)
    print(layer.__class__.__name__,'output shape:\t',x_c10.shape)
'''

Conv2d output shape:	 torch.Size([1, 64, 55, 55])
ReLU output shape:	 torch.Size([1, 64, 55, 55])
MaxPool2d output shape:	 torch.Size([1, 64, 27, 27])
Conv2d output shape:	 torch.Size([1, 192, 27, 27])
ReLU output shape:	 torch.Size([1, 192, 27, 27])
MaxPool2d output shape:	 torch.Size([1, 192, 13, 13])
Conv2d output shape:	 torch.Size([1, 384, 13, 13])
ReLU output shape:	 torch.Size([1, 384, 13, 13])
Conv2d output shape:	 torch.Size([1, 256, 13, 13])
ReLU output shape:	 torch.Size([1, 256, 13, 13])
Conv2d output shape:	 torch.Size([1, 256, 13, 13])
ReLU output shape:	 torch.Size([1, 256, 13, 13])
MaxPool2d output shape:	 torch.Size([1, 256, 6, 6])
AdaptiveAvgPool2d output shape:	 torch.Size([1, 256, 6, 6])
Flatten output shape:	 torch.Size([1, 9216])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:

"\nprint('*'*50)\n\nfor layer in alexnet_c:\n    X_c=layer(X_c)\n    print(layer.__class__.__name__,'output shape:\t',X_c.shape)\n    \nprint('*'*50)\n\nfor layer in alexnet_c10:\n    x_c10=layer(x_c10)\n    print(layer.__class__.__name__,'output shape:\t',x_c10.shape)\n"

In [9]:
# load the data
# fashion mnist
def get_dataloader_workers():
    """Use 4 processes to read the data.

    Defined in :numref:`sec_utils`"""
    return 4

def load_data_fashion_mnist(batch_size, resize=None):
    """下载Fashion-MNIST数据集，然后将其加载到内存中

    Defined in :numref:`sec_fashion_mnist`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=get_dataloader_workers()))
'''
def load_data_cifar100(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR100(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR100(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))
    
def load_data_cifar10(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR10(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR10(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))
'''
    

'\ndef load_data_cifar100(batch_size, resize=None):\n    """Download the Fashion-MNIST dataset and then load it into memory.\n\n    Defined in :numref:`sec_utils`"""\n    trans = [transforms.ToTensor()]\n    if resize:\n        trans.insert(0, transforms.Resize(resize))\n    trans = transforms.Compose(trans)\n    # import the cifar100 dataset\n    cifar_train = torchvision.datasets.CIFAR100(\n        root="../data", train=True, transform=trans, download=True)\n    cifar_test = torchvision.datasets.CIFAR100(\n        root="../data", train=False, transform=trans, download=True)\n    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,\n                                        num_workers=get_dataloader_workers()),\n            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,\n                                        num_workers=get_dataloader_workers()))\n    \ndef load_data_cifar10(batch_size, resize=None):\n    """Download the Fashion-MNIST dataset

### Parameters

In [10]:
batch_size = [256]
# epochs = [10, 20, 30, 40, 50, 80, 100]
epochs = [1]
rounds = 1

### Timer

### Train Model

In [11]:
'''
def train_func(net, train_iter, test_iter, alexlayer, num_epochs, lr, device):
    def init_weights(m): # 初始化权重
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    # create a ndarray to store each layer's total running time of each epoch
    Layers_time = np.zeros((len(alexlayer), num_epochs)) # each row is a layer, each column is an epoch
    print(f'The name of the layers are: {alexlayer}')
    Train_part_time = np.zeros((6, num_epochs)) # store the time to device, forward and backward time, and test time of each epoch
    Train_time = np.zeros(num_epochs) # store the total training time of each epoch
    Train_acc = np.zeros(num_epochs) # store the training accuracy of each epoch
    Test_acc = np.zeros(num_epochs) # store the test accuracy of each epoch
    Epoch_time = np.zeros(num_epochs) # store the total time of each epoch
    Epoch_energy = np.zeros((num_epochs,1), dtype='object') # store the total energy of each epoch
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    timer = d2l.Timer()
    train_timer = d2l.Timer()
    ttd_timer = d2l.Timer()
    forward_timer = d2l.Timer()
    loss_timer = d2l.Timer()
    backward_timer = d2l.Timer()
    opt_timer = d2l.Timer()
    layer_timer = d2l.Timer()
    test_timer = d2l.Timer()
    # start training
    for epoch in range(num_epochs):
        print('The epoch is:', epoch+1)
        timer.start()
        net.train()
        train_epoch, ttd_epoch, forward_epoch, loss_epoch, backward_epoch, opt_epoch, testtime_epoch= 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
        layer_epoch = np.zeros((len(alexlayer), 1)) # store the total running time of each layer in one epoch
        metric = d2l.Accumulator(3)  # train_loss, train_acc, num_examples   
        # start the nvidia-smi command
        with open('gpu_power_usage.csv', 'w') as file:
            # Start the nvidia-smi command
            nvidia_smi_process = subprocess.Popen(
                ["nvidia-smi", "--query-gpu=power.draw", "--format=csv", "--loop-ms=10"],
                stdout=file,  # Redirect the output directly to the file
                stderr=subprocess.PIPE,
                text=True)
        train_timer.start()
        for i, (X, y) in enumerate(train_iter):
            lnamenum = 0
            print('The batch is:', i+1)
            optimizer.zero_grad()
            # to device
            torch.cuda.synchronize()  # 等待数据传输完成
            ttd_timer.start()
            # sleep for 2 seconds
            time.sleep(2)
            X, y = X.to(device), y.to(device)
            torch.cuda.synchronize()  # 等待数据传输完成
            # sleep for 2 seconds
            time.sleep(2)
            ttd_epoch += ttd_timer.stop()
            # forward
            forward_timer.start()
            y_hat = X
            for layer in net:
                name = layer.__class__.__name__ # 获取层的名字
                lnamenum += 1
                lname = name[:4] + str(lnamenum)
                layer_index = alexlayer.index(lname)
                layer_timer.start()
                y_hat = layer(y_hat)
                torch.cuda.synchronize()  # 等待数据传输完成
                layer_epoch[layer_index] += layer_timer.stop()
            torch.cuda.synchronize()  # 等待数据传输完成
            forward_epoch += forward_timer.stop()
            # loss
            loss_timer.start()
            l = loss_fn(y_hat, y)
            # backward
            torch.cuda.synchronize()  # 等待数据传输完成
            loss_epoch += loss_timer.stop()
            backward_timer.start()
            l.backward()
            torch.cuda.synchronize()  # 等待数据传输完成
            backward_epoch += backward_timer.stop()
            # optimize
            opt_timer.start()   
            optimizer.step()
            torch.cuda.synchronize()  # 等待数据传输完成
            opt_epoch += opt_timer.stop()
            with torch.no_grad():
                metric.add(l*X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            train_acc = metric[1] / metric[2]
        train_epoch = train_timer.stop()
        test_timer.start()
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        testtime_epoch = test_timer.stop()
        print(f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')
        print('epoch %d, time %f sec' % (epoch+1, timer.sum()))
        # store the time and acc data
        Epoch_time[epoch] = timer.stop()
        print(f'The total time of the {epoch} is:', Epoch_time[epoch])
        Layers_time[:, epoch] = layer_epoch.flatten()
        Train_part_time[:, epoch] = ttd_epoch, forward_epoch, loss_epoch, backward_epoch, opt_epoch, testtime_epoch
        Train_time[epoch] = train_epoch
        Train_acc[epoch] = train_acc
        Test_acc[epoch] = test_acc
        # stop the nvidia-smi command
        nvidia_smi_process.terminate()
        # calculate the energy consumption of each epoch
        GPU_df = pd.read_csv('gpu_power_usage.csv')
        for row in range(len(GPU_df)):
            GPU_df.iloc[row,0] = GPU_df.iloc[row,0].replace(' W','')
        Consumption_df = GPU_df.astype(float)  
        EnergyDatai = Consumption_df.iloc[:,0].values # 将数据转换为numpy数组
        # store the energy data
        Epoch_energy[epoch,0] = EnergyDatai
    return Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, Epoch_time, Epoch_energy
'''

'\ndef train_func(net, train_iter, test_iter, alexlayer, num_epochs, lr, device):\n    def init_weights(m): # 初始化权重\n        if type(m) == nn.Linear or type(m) == nn.Conv2d:\n            nn.init.xavier_uniform_(m.weight)\n    net.apply(init_weights)\n    print(\'training on\', device)\n    net.to(device)\n    # create a ndarray to store each layer\'s total running time of each epoch\n    Layers_time = np.zeros((len(alexlayer), num_epochs)) # each row is a layer, each column is an epoch\n    print(f\'The name of the layers are: {alexlayer}\')\n    Train_part_time = np.zeros((6, num_epochs)) # store the time to device, forward and backward time, and test time of each epoch\n    Train_time = np.zeros(num_epochs) # store the total training time of each epoch\n    Train_acc = np.zeros(num_epochs) # store the training accuracy of each epoch\n    Test_acc = np.zeros(num_epochs) # store the test accuracy of each epoch\n    Epoch_time = np.zeros(num_epochs) # store the total time of each epoch\

In [12]:
def nvml_sampling_thread(handle, data_queue, stop_event, sampling_interval=0.01):
    """
    在单独的线程中定期调用 NVML, 获取功耗数据并存储到 data_queue 中。
    参数：
    - handle: nvmlDeviceGetHandleByIndex(0) 得到的 GPU 句柄
    - data_queue: 用于存放 (timestamp, power_in_watts) 数据的队列
    - stop_event: 当此事件被设置时，线程应结束循环
    - sampling_interval: 采样间隔（秒）
    """
    while not stop_event.is_set():
        current_time = time.time()
        current_power = nvmlDeviceGetPowerUsage(handle) / 1000.0  # mW -> W
        data_queue.put((current_time, current_power))
        time.sleep(sampling_interval)

In [13]:
'''
def train_func(net, train_iter, test_iter, alexlayer, num_epochs, lr, device):
    def init_weights(m): # 初始化权重
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    # create a ndarray to store each layer's total running time of each epoch
    # Layers_time = np.zeros((len(alexlayer), num_epochs)) # each row is a layer, each column is an epoch
    print(f'The name of the layers are: {alexlayer}')
    # Train_part_time = np.zeros((6, num_epochs)) # store the time to device, forward and backward time, and test time of each epoch
    # Train_time = np.zeros(num_epochs) # store the total training time of each epoch
    # Epoch_time = np.zeros(num_epochs) # store the total time of each epoch
    # Epoch_energy = np.zeros((num_epochs,1), dtype='object') # store the total energy of each epoch
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    timer = d2l.Timer()
    # train_timer = d2l.Timer()
    ttd_timer = d2l.Timer()
    # forward_timer = d2l.Timer()
    # loss_timer = d2l.Timer()
    # backward_timer = d2l.Timer()
    # opt_timer = d2l.Timer()
    # layer_timer = d2l.Timer()
    # test_timer = d2l.Timer()
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    # 创建一个队列来存储功率数据
    power_data_queue = queue.Queue()

    # 创建一个线程停止事件
    stop_event = threading.Event()

    # 启动采样线程，每10ms获取一次功耗数据
    sampling_interval = 0.005

    sampler_thread1 = threading.Thread(target=nvml_sampling_thread, args=(handle, power_data_queue, stop_event, sampling_interval))
    start_time = time.time()
    ttd_total = []
    # start training
    for epoch in range(num_epochs):
        ttd_epoch = 0.0
        print('The epoch is:', epoch+1)
        timer.start()
        net.train()
        # train_epoch, forward_epoch, loss_epoch, backward_epoch, opt_epoch, testtime_epoch= 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
        # layer_epoch = np.zeros((len(alexlayer), 1)) # store the total running time of each layer in one epoch
        metric = d2l.Accumulator(3)  # train_loss, train_acc, num_examples
        ##########################################################################################################   
        # start the nvidia-smi command
        # with open('gpu_power_usage.csv', 'w') as file:
        #     # Start the nvidia-smi command
        #     nvidia_smi_process = subprocess.Popen(
        #         ["nvidia-smi", "--query-gpu=power.draw", "--format=csv", "--loop-ms=10"],
        #         stdout=file,  # Redirect the output directly to the file
        #         stderr=subprocess.PIPE,
        #         text=True)
        ##########################################################################################################
        # train_timer.start()
        for i, (X, y) in enumerate(train_iter):
            # lnamenum = 0
            print('The batch is:', i+1)
            optimizer.zero_grad()
            # to device
            torch.cuda.synchronize()  # 等待数据传输完成
            ttd_timer.start()
            sampler_thread1.start()
            # start_power = nvmlDeviceGetPowerUsage(handle) / 1000.0
            X, y = X.to(device), y.to(device)
            torch.cuda.synchronize()  # 等待数据传输完成
            duration = ttd_timer.stop()
            # 训练结束后，通知线程停止并等待其结束
            stop_event.set()
            sampler_thread1.join()
            # end_power = nvmlDeviceGetPowerUsage(handle) / 1000.0
            # avg_power = (start_power + end_power) / 2.0
            # energy_joules = avg_power * duration
            # print(f"Data transfer took {duration:.6f}s, Estimated Energy: {energy_joules:.6f}J")
            # forward
            # forward_timer.start()
            y_hat = net(X)
            # for layer in net:
            #     name = layer.__class__.__name__ # 获取层的名字
            #     lnamenum += 1
            #     lname = name[:4] + str(lnamenum)
            #     layer_index = alexlayer.index(lname)
            #     layer_timer.start()
            #     y_hat = layer(y_hat)
            #     torch.cuda.synchronize()  # 等待数据传输完成
            #     layer_epoch[layer_index] += layer_timer.stop()
            torch.cuda.synchronize()  # 等待数据传输完成
            # forward_epoch += forward_timer.stop()
            # loss
            # loss_timer.start()
            l = loss_fn(y_hat, y)
            # backward
            torch.cuda.synchronize()  # 等待数据传输完成
            # loss_epoch += loss_timer.stop()
            # backward_timer.start()
            l.backward()
            torch.cuda.synchronize()  # 等待数据传输完成
            # backward_epoch += backward_timer.stop()
            # optimize
            # opt_timer.start()   
            optimizer.step()
            torch.cuda.synchronize()  # 等待数据传输完成
            # opt_epoch += opt_timer.stop()
            with torch.no_grad():
                metric.add(l*X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            train_acc = metric[1] / metric[2]
        # train_epoch = train_timer.stop()
        # test_timer.start()
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        # testtime_epoch = test_timer.stop()
        print(f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')
        print('epoch %d, time %f sec' % (epoch+1, timer.sum()))
        # store the time and acc data
        # Epoch_time[epoch] = timer.stop()
        # print(f'The total time of the {epoch} is:', Epoch_time[epoch])
        # Layers_time[:, epoch] = layer_epoch.flatten()
        # Train_part_time[:, epoch] = ttd_epoch, forward_epoch, loss_epoch, backward_epoch, opt_epoch, testtime_epoch
        # Train_time[epoch] = train_epoch
        # Train_acc[epoch] = train_acc
        # Test_acc[epoch] = test_acc
        # stop the nvidia-smi command
        # nvidia_smi_process.terminate()
        # calculate the energy consumption of each epoch
        # GPU_df = pd.read_csv('gpu_power_usage.csv')
        # for row in range(len(GPU_df)):
        #     GPU_df.iloc[row,0] = GPU_df.iloc[row,0].replace(' W','')
        # Consumption_df = GPU_df.astype(float)  
        # EnergyDatai = Consumption_df.iloc[:,0].values # 将数据转换为numpy数组
        # store the energy data
        # Epoch_energy[epoch,0] = EnergyDatai
    # 关闭 NVML
    nvmlShutdown()
    # return Layers_time, Train_part_time, Train_time, Epoch_time
'''

'\ndef train_func(net, train_iter, test_iter, alexlayer, num_epochs, lr, device):\n    def init_weights(m): # 初始化权重\n        if type(m) == nn.Linear or type(m) == nn.Conv2d:\n            nn.init.xavier_uniform_(m.weight)\n    net.apply(init_weights)\n    print(\'training on\', device)\n    net.to(device)\n    # create a ndarray to store each layer\'s total running time of each epoch\n    # Layers_time = np.zeros((len(alexlayer), num_epochs)) # each row is a layer, each column is an epoch\n    print(f\'The name of the layers are: {alexlayer}\')\n    # Train_part_time = np.zeros((6, num_epochs)) # store the time to device, forward and backward time, and test time of each epoch\n    # Train_time = np.zeros(num_epochs) # store the total training time of each epoch\n    # Epoch_time = np.zeros(num_epochs) # store the total time of each epoch\n    # Epoch_energy = np.zeros((num_epochs,1), dtype=\'object\') # store the total energy of each epoch\n    optimizer = torch.optim.SGD(net.parameters

In [14]:
def nvml_sampling_thread(handle, data_queue, stop_event, sampling_interval=0.01):
    while not stop_event.is_set():
        current_time = time.time()
        current_power = nvmlDeviceGetPowerUsage(handle) / 1000.0
        data_queue.put((current_time, current_power))
        time.sleep(sampling_interval)

# def integrate_power_over_interval(samples, start_time, end_time):
#     # 对 [start_time, end_time] 区间内的功率样本进行积分（简单线性近似）
#     # samples 是 (time, power) 的列表，假定按时间排序
#     # 过滤出在 start_time 之前和 end_time 之后的样本
#     filtered = [(t, p) for t, p in samples if start_time <= t <= end_time]
#     if len(filtered) < 2:
#         # 样本过少，无法良好估计，这时可以尝试从samples中找到距离start和end最近的点
#         # 简化处理：返回0或尝试扩展为插值
#         return 0.0

def integrate_power_over_interval(samples, start_time, end_time):
    # 假定 samples是按时间升序排序的 (t, p)
    # 若未排序，请先排序:
    # samples = sorted(samples, key=lambda x: x[0])
    
    def interpolate(samples, target_time):
        # 在 samples 中找到 target_time 左右最近的两个点，并进行线性插值
        # 若 target_time 恰好等于某个样本点时间，直接返回该点功率
        # 若无法找到两侧点（如 target_time在样本时间轴外），根据情况返回None或边界点
        n = len(samples)
        if n == 0:
            return None
        # 若 target_time 小于第一个样本点时间，无法向左插值，这里直接返回第一个点的功率值(或None)
        if target_time <= samples[0][0]:
            # 简化处理：返回最早样本点的功率（或None）
            return samples[0][1]
        # 若 target_time 大于最后一个样本点时间，无法向右插值，返回最后一个点的功率（或None）
        if target_time >= samples[-1][0]:
            return samples[-1][1]

        # 否则，在中间插值
        # 使用二分查找快速定位
        import bisect
        times = [t for t, _ in samples]
        pos = bisect.bisect_left(times, target_time)
        # pos是使times保持有序插入target_time的位置
        # 因为target_time不在已有样本点中，pos不会越界且pos>0且pos<n
        t1, p1 = samples[pos-1]
        t2, p2 = samples[pos]
        # 线性插值： p = p1 + (p2 - p1)*((target_time - t1)/(t2 - t1))
        ratio = (target_time - t1) / (t2 - t1)
        p = p1 + (p2 - p1)*ratio
        return p

    # 从原始 samples 中筛选出位于[start_time, end_time]内的点
    filtered = [(t, p) for t, p in samples if start_time <= t <= end_time]

    # 如果不足2个点，则尝试使用插值
    if len(filtered) < 2:
        # 无论如何都需要在边界处插值出两个点(起码start和end)
        start_power = interpolate(samples, start_time)
        end_power = interpolate(samples, end_time)

        # 如果从样本中无法插值出任何有意义的点（比如samples为空或无法插值），返回0.0
        if start_power is None or end_power is None:
            return 0.0

        # 将插值的边界点加入到 filtered
        # 注意：如果filtered中有一个点在区间内，我们也需要确保边界有两点以上
        # 例如filtered只有一个点在中间，则需要在start和end插值点全部加入。
        # 若filtered为空，则只用start/end两点插值点求积分
        new_filtered = [(start_time, start_power)] + filtered + [(end_time, end_power)]
        # 确保按时间排序
        new_filtered.sort(key=lambda x: x[0])
        filtered = new_filtered

    # 正常积分计算
    if len(filtered) < 2:
        # 经过插值仍不够，返回0
        return 0.0

    total_energy = 0.0
    for i in range(len(filtered)-1):
        t1, p1 = filtered[i]
        t2, p2 = filtered[i+1]
        dt = t2 - t1
        avg_p = (p1 + p2)/2.0
        total_energy += avg_p * dt

    return total_energy
    

def train_func(net, train_iter, test_iter, alexlayer, num_epochs, lr, device):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    print(f'The name of the layers are: {alexlayer}')
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    
    # 初始化NVML和采样线程
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    power_data_queue = queue.Queue()
    stop_event = threading.Event()
    sampling_interval = 0.005
    sampler_thread = threading.Thread(target=nvml_sampling_thread, args=(handle, power_data_queue, stop_event, sampling_interval))
    sampler_thread.start()

    for epoch in range(num_epochs):
        print('The epoch is:', epoch+1)
        metric = d2l.Accumulator(3)  # train_loss, train_acc, num_examples
        to_device_intervals = []  # 用来记录本epoch每个batch的to_device时间段
        forward_intervals = []  # 用来记录本epoch每个batch的forward时间段
        backward_intervals = []  # 用来记录本epoch每个batch的backward时间段

        # 记录epoch开始时队列中已有的数据条数，用于后面区分本epoch的数据
        initial_queue_size = power_data_queue.qsize()

        net.train()
        for i, (X, y) in enumerate(train_iter):
            print('The batch is:', i+1)
            optimizer.zero_grad()
            
            # 记录to_device前后的时间戳
            start_ttd_time = time.time()
            X, y = X.to(device), y.to(device)
            torch.cuda.synchronize()
            end_ttd_time = time.time()
            to_device_intervals.append((start_ttd_time, end_ttd_time))

            time.sleep(0.5)
            # forward
            start_forward_time = time.time()
            y_hat = net(X)
            torch.cuda.synchronize()
            end_forward_time = time.time()
            forward_intervals.append((start_forward_time, end_forward_time))
            l = loss_fn(y_hat, y)
            torch.cuda.synchronize()
            start_backward_time = time.time()
            l.backward()
            torch.cuda.synchronize()
            end_backward_time = time.time()
            backward_intervals.append((start_backward_time, end_backward_time))
            optimizer.step()

            with torch.no_grad():
                metric.add(l*X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            train_acc = metric[1] / metric[2]

        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        print(f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')

        # 从队列中取出本epoch采样数据
        epoch_samples = []
        # 这里取出initial_queue_size之后加入的所有数据
        total_samples_in_epoch = power_data_queue.qsize() - initial_queue_size
        for _ in range(total_samples_in_epoch):
            epoch_samples.append(power_data_queue.get())

        # 对epoch中每个batch的to_device间隔计算能耗
        for idx, (s_time, e_time) in enumerate(to_device_intervals):
            batch_energy = integrate_power_over_interval(epoch_samples, s_time, e_time)
            print(f"Epoch {epoch+1}, Batch {idx+1}, to_device Energy: {batch_energy} J")
        
        # 对epoch中每个batch的forward间隔计算能耗
        for idx, (s_time, e_time) in enumerate(forward_intervals):
            batch_energy = integrate_power_over_interval(epoch_samples, s_time, e_time)
            print(f"Epoch {epoch+1}, Batch {idx+1}, forward Energy: {batch_energy} J")

        # 对epoch中每个batch的backward间隔计算能耗
        for idx, (s_time, e_time) in enumerate(backward_intervals):
            batch_energy = integrate_power_over_interval(epoch_samples, s_time, e_time)
            print(f"Epoch {epoch+1}, Batch {idx+1}, backward Energy: {batch_energy} J")

    # 训练结束后关闭线程
    stop_event.set()
    sampler_thread.join()

    nvmlShutdown()

In [15]:
# # 使用示例
# if __name__ == "__main__":
#     # 初始化 NVML
#     nvmlInit()
#     handle = nvmlDeviceGetHandleByIndex(0)

#     # 创建一个队列来存储功率数据
#     power_data_queue = queue.Queue()

#     # 创建一个线程停止事件
#     stop_event = threading.Event()

#     # 启动采样线程，每10ms获取一次功耗数据
#     sampling_interval = 0.005
#     sampler_thread = threading.Thread(target=nvml_sampling_thread, args=(handle, power_data_queue, stop_event, sampling_interval))
#     sampler_thread.start()

#     # 模拟训练过程
#     # 在真实使用中,这里是你的训练主循环
#     print("Training started...")
#     for epoch in range(3):
#         print(f"Epoch {epoch+1}...")
#         time.sleep(2)  # 模拟训练耗时
#     print("Training finished.")

#     # 训练结束后，通知线程停止并等待其结束
#     stop_event.set()
#     sampler_thread.join()

#     # 获取所有采样数据
#     sampled_data = []
#     while not power_data_queue.empty():
#         sampled_data.append(power_data_queue.get())

#     # 处理数据 (示例：打印前10条数据)
#     print("Sampled power data (first 10):", sampled_data[:10])

#     # 关闭 NVML
#     nvmlShutdown()

### Train the model

In [16]:
def train_model_f(main_folder, batch_size, num_epochs, round, lr, device):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    # 判断文件是否存在
    if epoch_batch_folder.exists():
        print("文件存在。")
    else:
        os.makedirs(epoch_batch_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", epoch_batch_folder)
        train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
        # show the shape of the data
        list_of_i = []
        for i, (X, y) in enumerate(train_iter):
            if i < 3:
                print('the shape of the', i, 'batch of the train_iter is:', X.shape)
            else:
                pass
            list_of_i.append(i)
        print(f'The number of batches is: {np.array(list_of_i).shape}')
        train_func(alexnet_f, train_iter, test_iter, alexlayer, num_epochs, lr, device)
        # Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, \
        #     Epoch_time, Epoch_energy = train_func(alexnet_f, train_iter, test_iter, alexlayer, num_epochs, lr, device)
        # save the data
        # np.save(epoch_batch_folder/'Layers_time.npy', Layers_time)
        # np.save(epoch_batch_folder/'Train_part_time.npy', Train_part_time)
        # np.save(epoch_batch_folder/'Train_time.npy', Train_time)
        # np.save(epoch_batch_folder/'Epoch_time.npy', Epoch_time)

In [17]:
'''
def train_model_f(main_folder, batch_size, num_epochs, round, lr, device):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    # 判断文件是否存在
    if epoch_batch_folder.exists():
        print("文件存在。")
    else:
        os.makedirs(epoch_batch_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", epoch_batch_folder)
        train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
        # show the shape of the data
        list_of_i = []
        for i, (X, y) in enumerate(train_iter):
            if i < 3:
                print('the shape of the', i, 'batch of the train_iter is:', X.shape)
            else:
                pass
            list_of_i.append(i)
        print(f'The number of batches is: {np.array(list_of_i).shape}')
        Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, \
            Epoch_time, Epoch_energy = train_func(alexnet_f, train_iter, test_iter, alexlayer, num_epochs, lr, device)
        # save the data
        np.save(epoch_batch_folder/'Layers_time.npy', Layers_time)
        np.save(epoch_batch_folder/'Train_part_time.npy', Train_part_time)
        np.save(epoch_batch_folder/'Train_time.npy', Train_time)
        np.save(epoch_batch_folder/'Train_acc.npy', Train_acc)
        np.save(epoch_batch_folder/'Test_acc.npy', Test_acc)
        np.save(epoch_batch_folder/'Epoch_time.npy', Epoch_time)
        np.save(epoch_batch_folder/'Epoch_energy.npy', Epoch_energy)
'''

'\ndef train_model_f(main_folder, batch_size, num_epochs, round, lr, device):\n    print(f\'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running\')\n    # create the folder to store the data\n    epoch_batch_folder = main_folder/f\'E{num_epochs}_B{batch_size}_R{round}\'\n    # 判断文件是否存在\n    if epoch_batch_folder.exists():\n        print("文件存在。")\n    else:\n        os.makedirs(epoch_batch_folder)\n        print("文件不存在，已创建。")\n        print("文件创建于：", epoch_batch_folder)\n        train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)\n        # show the shape of the data\n        list_of_i = []\n        for i, (X, y) in enumerate(train_iter):\n            if i < 3:\n                print(\'the shape of the\', i, \'batch of the train_iter is:\', X.shape)\n            else:\n                pass\n            list_of_i.append(i)\n        print(f\'The number of batches is: {np.array(list_of_i).shape}\')\n        Layers_time, Train_part_time, 

In [18]:
'''
def train_model_c(main_folder, batch_size, num_epochs, round, lr, device):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    # 判断文件是否存在
    if epoch_batch_folder.exists():
        print("文件存在。")
        pass
    else:
        os.makedirs(epoch_batch_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", epoch_batch_folder)
        train_iter, test_iter = load_data_cifar100(batch_size, resize=224)
        # show the shape of the data
        list_of_i = []
        for i, (X, y) in enumerate(train_iter):
            if i < 3:
                print('the shape of the', i, 'batch of the train_iter is:', X.shape)
            else:
                pass
            list_of_i.append(i)
        print(f'The number of batches is: {np.array(list_of_i).shape}')
        Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, \
            Epoch_time, Epoch_energy = train_func(alexnet_c, train_iter, test_iter, alexlayer, num_epochs, lr, device)
        # save the data
        np.save(epoch_batch_folder/'Layers_time.npy', Layers_time)
        np.save(epoch_batch_folder/'Train_part_time.npy', Train_part_time)
        np.save(epoch_batch_folder/'Train_time.npy', Train_time)
        np.save(epoch_batch_folder/'Train_acc.npy', Train_acc)
        np.save(epoch_batch_folder/'Test_acc.npy', Test_acc)
        np.save(epoch_batch_folder/'Epoch_time.npy', Epoch_time)
        np.save(epoch_batch_folder/'Epoch_energy.npy', Epoch_energy)
'''

'\ndef train_model_c(main_folder, batch_size, num_epochs, round, lr, device):\n    print(f\'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running\')\n    # create the folder to store the data\n    epoch_batch_folder = main_folder/f\'E{num_epochs}_B{batch_size}_R{round}\'\n    # 判断文件是否存在\n    if epoch_batch_folder.exists():\n        print("文件存在。")\n        pass\n    else:\n        os.makedirs(epoch_batch_folder)\n        print("文件不存在，已创建。")\n        print("文件创建于：", epoch_batch_folder)\n        train_iter, test_iter = load_data_cifar100(batch_size, resize=224)\n        # show the shape of the data\n        list_of_i = []\n        for i, (X, y) in enumerate(train_iter):\n            if i < 3:\n                print(\'the shape of the\', i, \'batch of the train_iter is:\', X.shape)\n            else:\n                pass\n            list_of_i.append(i)\n        print(f\'The number of batches is: {np.array(list_of_i).shape}\')\n        Layers_time, Train_pa

In [19]:
'''
def train_model_c10(main_folder, batch_size, num_epochs, round, lr, device):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    # 判断文件是否存在
    if epoch_batch_folder.exists():
        print("文件存在。")
        pass
    else:
        os.makedirs(epoch_batch_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", epoch_batch_folder)
        train_iter, test_iter = load_data_cifar10(batch_size, resize=224)
        # show the shape of the data
        list_of_i = []
        for i, (X, y) in enumerate(train_iter):
            if i < 3:
                print('the shape of the', i, 'batch of the train_iter is:', X.shape)
            else:
                pass
            list_of_i.append(i)
        print(f'The number of batches is: {np.array(list_of_i).shape}')
        Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, \
            Epoch_time, Epoch_energy = train_func(alexnet_c, train_iter, test_iter, alexlayer, num_epochs, lr, device)
        # save the data
        np.save(epoch_batch_folder/'Layers_time.npy', Layers_time)
        np.save(epoch_batch_folder/'Train_part_time.npy', Train_part_time)
        np.save(epoch_batch_folder/'Train_time.npy', Train_time)
        np.save(epoch_batch_folder/'Train_acc.npy', Train_acc)
        np.save(epoch_batch_folder/'Test_acc.npy', Test_acc)
        np.save(epoch_batch_folder/'Epoch_time.npy', Epoch_time)
        np.save(epoch_batch_folder/'Epoch_energy.npy', Epoch_energy)
'''

'\ndef train_model_c10(main_folder, batch_size, num_epochs, round, lr, device):\n    print(f\'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running\')\n    # create the folder to store the data\n    epoch_batch_folder = main_folder/f\'E{num_epochs}_B{batch_size}_R{round}\'\n    # 判断文件是否存在\n    if epoch_batch_folder.exists():\n        print("文件存在。")\n        pass\n    else:\n        os.makedirs(epoch_batch_folder)\n        print("文件不存在，已创建。")\n        print("文件创建于：", epoch_batch_folder)\n        train_iter, test_iter = load_data_cifar10(batch_size, resize=224)\n        # show the shape of the data\n        list_of_i = []\n        for i, (X, y) in enumerate(train_iter):\n            if i < 3:\n                print(\'the shape of the\', i, \'batch of the train_iter is:\', X.shape)\n            else:\n                pass\n            list_of_i.append(i)\n        print(f\'The number of batches is: {np.array(list_of_i).shape}\')\n        Layers_time, Train_p

In [20]:
lr = 0.01
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('The device is:', device)

The device is: cuda


In [21]:
# create the folder to store the data
main_folder = data_path
print('The folder is:', main_folder)
# find out that if the folder exists in the data path
# 判断文件是否存在
if main_folder.exists():
    print("文件存在。")
else:
    os.makedirs(main_folder)
    print("文件不存在，已创建。")
    print("文件创建于：", main_folder)
for epoch in epochs:
    for batch in batch_size:
        for round in range(rounds):
            train_model_f(main_folder, batch, epoch, round, lr, device)

The folder is: /home/GreenAI/3080/AlexNet_test_data
文件存在。
The epoch is set: 1, batch is set: 256, is in 1th running
文件不存在，已创建。
文件创建于： /home/GreenAI/3080/AlexNet_test_data/E1_B256_R0


the shape of the 0 batch of the train_iter is: torch.Size([256, 1, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([256, 1, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([256, 1, 224, 224])
The number of batches is: (235,)
training on cuda
The name of the layers are: ['Conv1', 'ReLU2', 'MaxP3', 'Conv4', 'ReLU5', 'MaxP6', 'Conv7', 'ReLU8', 'Conv9', 'ReLU10', 'Conv11', 'ReLU12', 'MaxP13', 'Adap14', 'Flat15', 'Line16', 'ReLU17', 'Drop18', 'Line19', 'ReLU20', 'Drop21', 'Line22']
The epoch is: 1
The batch is: 1
The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 21
The batch is: 22
The batch is: 23
The batch is: 24
The batch is: 25
The batch is: 26
The batch is: 27
The batch 

In [22]:
'''
# create the folder to store the data
main_folder = data_path/'cifar100'
print('The folder is:', main_folder)
# find out that if the folder exists in the data path
# 判断文件是否存在
if main_folder.exists():
    print("文件存在。")
else:
    os.makedirs(main_folder)
    print("文件不存在，已创建。")
    print("文件创建于：", main_folder)
for epoch in epochs:
    for batch in batch_size:
        for round in range(rounds):
            train_model_c(main_folder, batch, epoch, round, lr, device)
'''

'\n# create the folder to store the data\nmain_folder = data_path/\'cifar100\'\nprint(\'The folder is:\', main_folder)\n# find out that if the folder exists in the data path\n# 判断文件是否存在\nif main_folder.exists():\n    print("文件存在。")\nelse:\n    os.makedirs(main_folder)\n    print("文件不存在，已创建。")\n    print("文件创建于：", main_folder)\nfor epoch in epochs:\n    for batch in batch_size:\n        for round in range(rounds):\n            train_model_c(main_folder, batch, epoch, round, lr, device)\n'

In [23]:
'''
# create the folder to store the data
main_folder = data_path/'cifar10'
print('The folder is:', main_folder)
# find out that if the folder exists in the data path
# 判断文件是否存在
if main_folder.exists():
    print("文件存在。")
else:
    os.makedirs(main_folder)
    print("文件不存在，已创建。")
    print("文件创建于：", main_folder)
for epoch in epochs:
    for batch in batch_size:
        for round in range(rounds):
            train_model_c10(main_folder, batch, epoch, round, lr, device)
'''

'\n# create the folder to store the data\nmain_folder = data_path/\'cifar10\'\nprint(\'The folder is:\', main_folder)\n# find out that if the folder exists in the data path\n# 判断文件是否存在\nif main_folder.exists():\n    print("文件存在。")\nelse:\n    os.makedirs(main_folder)\n    print("文件不存在，已创建。")\n    print("文件创建于：", main_folder)\nfor epoch in epochs:\n    for batch in batch_size:\n        for round in range(rounds):\n            train_model_c10(main_folder, batch, epoch, round, lr, device)\n'