In [1]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from torch.nn import functional as F
from d2l import torch as d2l
import numpy as np
import pandas as pd
from ptflops import get_model_complexity_info
from pathlib import Path
import subprocess
import os
import time
import functools

In [2]:
torch.cuda.reset_peak_memory_stats()
torch.cuda.empty_cache()

### Path

In [3]:
'''find the Model path'''
# find the current path
current_path = os.getcwd()
print('The current path is:', current_path)

# find the parent path
parent_path = Path(current_path).parent
print('The parent path is:', parent_path)

# find the data path
data_path = parent_path / 'Data/mobilenet_v1'
print('The data path is:', data_path)

The current path is: /root/GreenAI/Cloud/4090/code
The parent path is: /root/GreenAI/Cloud/4090
The data path is: /root/GreenAI/Cloud/4090/Data/mobilenet_v1


### Model

In [4]:
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super(DepthwiseSeparableConv, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class MobileNet(nn.Module):
    def __init__(self, input_channels, output_channels):
        super(MobileNet, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            DepthwiseSeparableConv(32, 64, 1),
            DepthwiseSeparableConv(64, 128, 2),
            DepthwiseSeparableConv(128, 128, 1),
            DepthwiseSeparableConv(128, 256, 2),
            DepthwiseSeparableConv(256, 256, 1),
            DepthwiseSeparableConv(256, 512, 2),

            # Typically, 5 Depthwise Separable Convolutions are repeated here, each with stride 1
            *[DepthwiseSeparableConv(512, 512, 1) for _ in range(5)],

            DepthwiseSeparableConv(512, 1024, 2),
            DepthwiseSeparableConv(1024, 1024, 1),

            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(1024, output_channels)
        )

    def forward(self, x):
        return self.model(x)

In [5]:
net = MobileNet(1, 10)    
LayerName = []
block_num = 0
resblock_num = 0

for num, layer in net.named_children():  # 使用 named_children 来获取层名和层
    layername = layer.__class__.__name__
    print(f"{num}: {layername}")  # 打印层名和层类名
    if layer.__class__.__name__ == 'Sequential':
        block_num += 1
        for sublayernum, sublayer in layer.named_children():  # 再次使用 named_children
            sublayername = sublayer.__class__.__name__
            if sublayername == 'DepthwiseSeparableConv':
                resblock_num += 1
            print(f"  {sublayernum}: {sublayername}")
            layer_label = f'{layername[0]}{num}_{sublayername[0]}{sublayernum}'
            LayerName.append(layer_label)  # 收集子块的类型
    else:
        layer_label = f'{layername[0]}{num}'
        LayerName.append(layer_label)
            
print('The layer name is:', LayerName)
print(f'The length of layer name is: {len(LayerName)}')
print('The number of blocks is:', block_num)
print('The number of inception blocks is:', resblock_num)

model: Sequential
  0: Conv2d
  1: BatchNorm2d
  2: ReLU
  3: DepthwiseSeparableConv
  4: DepthwiseSeparableConv
  5: DepthwiseSeparableConv
  6: DepthwiseSeparableConv
  7: DepthwiseSeparableConv
  8: DepthwiseSeparableConv
  9: DepthwiseSeparableConv
  10: DepthwiseSeparableConv
  11: DepthwiseSeparableConv
  12: DepthwiseSeparableConv
  13: DepthwiseSeparableConv
  14: DepthwiseSeparableConv
  15: DepthwiseSeparableConv
  16: AdaptiveAvgPool2d
  17: Flatten
  18: Linear
The layer name is: ['Smodel_C0', 'Smodel_B1', 'Smodel_R2', 'Smodel_D3', 'Smodel_D4', 'Smodel_D5', 'Smodel_D6', 'Smodel_D7', 'Smodel_D8', 'Smodel_D9', 'Smodel_D10', 'Smodel_D11', 'Smodel_D12', 'Smodel_D13', 'Smodel_D14', 'Smodel_D15', 'Smodel_A16', 'Smodel_F17', 'Smodel_L18']
The length of layer name is: 19
The number of blocks is: 1
The number of inception blocks is: 13


build different alexnet model for different datasets

In [6]:
# 对于不同的数据集，要设置不同的img_channel和num_labels
# Fashion-MNIST中的图像通道数为1，类别数为10
vgg_f = MobileNet(1, 10)
# CIFAR100中的图像通道数为3，类别数为100
vgg_c = MobileNet(3, 100)
vgg_c10 = MobileNet(3, 10)

In [7]:
# fashion mnist
with torch.cuda.device(0):
    macs_f, params_f = get_model_complexity_info(vgg_f, (1, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_f))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params_f))

print('*'*50)

# cifar100
with torch.cuda.device(0):
    macs_c, params_c = get_model_complexity_info(vgg_c, (3, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_c))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params_c))
    
# cifar100
with torch.cuda.device(0):
    macs_c10, params_c10 = get_model_complexity_info(vgg_c10, (3, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_c10))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params_c10))

MobileNet(
  3.21 M, 100.000% Params, 566.8 MMac, 99.920% MACs, 
  (model): Sequential(
    3.21 M, 100.000% Params, 566.8 MMac, 99.920% MACs, 
    (0): Conv2d(320, 0.010% Params, 4.01 MMac, 0.708% MACs, 1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(64, 0.002% Params, 802.82 KMac, 0.142% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(0, 0.000% Params, 401.41 KMac, 0.071% MACs, )
    (3): DepthwiseSeparableConv(
      2.43 k, 0.076% Params, 30.51 MMac, 5.378% MACs, 
      (depthwise): Conv2d(320, 0.010% Params, 4.01 MMac, 0.708% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
      (pointwise): Conv2d(2.11 k, 0.066% Params, 26.49 MMac, 4.670% MACs, 32, 64, kernel_size=(1, 1), stride=(1, 1))
    )
    (4): DepthwiseSeparableConv(
      8.96 k, 0.279% Params, 28.1 MMac, 4.953% MACs, 
      (depthwise): Conv2d(640, 0.020% Params, 2.01 MMac, 0.354% MACs, 64, 64, kernel_size=(3, 3), stride=(

### Datasets

In [8]:
X_f = torch.randn(size=(1, 1, 224, 224), dtype=torch.float32) # fashion mnist
X_c = torch.randn(size=(1, 3, 224, 224), dtype=torch.float32) # cifar100
X_c10 = torch.randn(size=(1, 3, 224, 224), dtype=torch.float32) # cifar100

# for layer in vgg_f:
#     X_f=layer(X_f)
#     print(layer.__class__.__name__,'output shape:\t',X_f.shape)

# print('*'*50)

# for layer in vgg_c:
#     X_c=layer(X_c)
#     print(layer.__class__.__name__,'output shape:\t',X_c.shape)

In [9]:
# load the data
# fashion mnist
def get_dataloader_workers():
    """Use 4 processes to read the data.

    Defined in :numref:`sec_utils`"""
    return 4

def load_data_fashion_mnist(batch_size, resize=None):
    """下载Fashion-MNIST数据集，然后将其加载到内存中

    Defined in :numref:`sec_fashion_mnist`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=get_dataloader_workers()))

def load_data_cifar100(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR100(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR100(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))
    
def load_data_cifar10(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR10(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR100(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))

### Parameters

In [10]:
batch_size = [256]
epochs = [20]
rounds = 1

### Train Model

In [11]:

def train_func(net, train_iter, test_iter, LayerName, block_num, num_epochs, lr, device):
    def init_weights(m): # 初始化权重
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    # record each block running time
    # ResBlock_time = np.zeros((block_num, num_epochs)) # each row is a block, each column is an epoch
    Layers_time = np.zeros((len(LayerName), num_epochs)) # each row is a layer, each column is an epoch
    Train_part_time = np.zeros((4, num_epochs)) # store the time to device, forward and backward time, and test time of each epoch
    Train_time = np.zeros(num_epochs) # store the total training time of each epoch
    Train_acc = np.zeros(num_epochs) # store the training accuracy of each epoch
    Test_acc = np.zeros(num_epochs) # store the test accuracy of each epoch
    Epoch_time = np.zeros(num_epochs) # store the total time of each epoch
    Epoch_energy = np.zeros((num_epochs,1), dtype='object') # store the total energy of each epoch
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    # resblock_timer = d2l.Timer()
    timer = d2l.Timer()
    train_timer = d2l.Timer()
    ttd_timer = d2l.Timer()
    forward_timer = d2l.Timer()
    # loss_timer = d2l.Timer()
    backward_timer = d2l.Timer()
    # opt_timer = d2l.Timer()
    layer_timer = d2l.Timer()
    test_timer = d2l.Timer()
    # start training
    for epoch in range(num_epochs):
        print('The epoch is:', epoch+1)
        timer.start()
        net.train()
        train_epoch, ttd_epoch, forward_epoch, backward_epoch, testtime_epoch= 0.0, 0.0, 0.0, 0.0, 0.0
        layer_epoch = np.zeros((len(LayerName), 1)) # store the total running time of each layer in one epoch
        metric = d2l.Accumulator(3)  # train_loss, train_acc, num_examples   
        # start the nvidia-smi command
        with open('gpu_power_usage.csv', 'w') as file:
            # Start the nvidia-smi command
            nvidia_smi_process = subprocess.Popen(
                ["nvidia-smi", "--query-gpu=power.draw", "--format=csv", "--loop-ms=1000"],
                stdout=file,  # Redirect the output directly to the file
                stderr=subprocess.PIPE,
                text=True)
        train_timer.start()
        for i, (X, y) in enumerate(train_iter):
            # batch_block_num = 0
            # batch_resblock_num = 0
            print('The batch is:', i+1)
            optimizer.zero_grad()
            # to device
            torch.cuda.synchronize()  # 等待数据传输完成
            ttd_timer.start()
            X, y = X.to(device), y.to(device)
            torch.cuda.synchronize()  # 等待数据传输完成
            ttd_epoch += ttd_timer.stop()
            # forward
            forward_timer.start()
            y_hat = X
            for num, layer in net.named_children():
                layername = layer.__class__.__name__
                if layername == 'Sequential':
                    for sublayernum, sublayer in layer.named_children():
                        sublayername = sublayer.__class__.__name__
                        namestr = f'{layername[0]}{num}_{sublayername[0]}{sublayernum}'
                        namestr_index = LayerName.index(namestr)     
                        layer_timer.start()
                        y_hat = sublayer(y_hat)    
                        torch.cuda.synchronize()
                        layer_epoch[namestr_index] += layer_timer.stop()
                else:
                    namestr = f'{layername[0]}{num}'
                    layer_index = LayerName.index(namestr)
                    layer_timer.start()
                    y_hat = layer(y_hat)
                    torch.cuda.synchronize()
                    layer_epoch[layer_index] += layer_timer.stop()
            torch.cuda.synchronize()  
            forward_epoch += forward_timer.stop()
            # loss
            # loss_timer.start()
            l = loss_fn(y_hat, y)
            # backward
            torch.cuda.synchronize()  # 等待数据传输完成
            # loss_epoch += loss_timer.stop()
            backward_timer.start()
            l.backward()
            torch.cuda.synchronize()  # 等待数据传输完成
            backward_epoch += backward_timer.stop()
            # optimize
            # opt_timer.start()
            optimizer.step()
            torch.cuda.synchronize()  # 等待数据传输完成
            # opt_epoch += opt_timer.stop()
            with torch.no_grad():
                metric.add(l*X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            train_acc = metric[1] / metric[2]
        train_epoch = train_timer.stop()
        test_timer.start()
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        testtime_epoch = test_timer.stop()
        print(f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')
        print('epoch %d, time %f sec' % (epoch+1, timer.sum()))
        # store the time and acc data
        Epoch_time[epoch] = timer.stop()
        print(f'The total time of the {epoch} is:', Epoch_time[epoch])
        Layers_time[:, epoch] = layer_epoch.flatten()
        Train_part_time[:, epoch] = ttd_epoch, forward_epoch, backward_epoch, testtime_epoch
        print(ttd_epoch, forward_epoch, backward_epoch, testtime_epoch)
        print('*'*50)
        Train_time[epoch] = train_epoch
        Train_acc[epoch] = train_acc
        Test_acc[epoch] = test_acc
        # stop the nvidia-smi command
        nvidia_smi_process.terminate()
        # calculate the energy consumption of each epoch
        GPU_df = pd.read_csv('gpu_power_usage.csv')
        for row in range(len(GPU_df)):
            GPU_df.iloc[row,0] = GPU_df.iloc[row,0].replace(' W','')
        Consumption_df = GPU_df.astype(float)  
        EnergyDatai = Consumption_df.iloc[:,0].values # 将数据转换为numpy数组
        # store the energy data
        Epoch_energy[epoch,0] = EnergyDatai
    return Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, Epoch_time, Epoch_energy

### Train the model

In [12]:
def train_model_f(main_folder, batch_size, num_epochs, round, lr, device):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    # 判断文件是否存在
    if epoch_batch_folder.exists():
        print("文件存在。")
    else:
        os.makedirs(epoch_batch_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", epoch_batch_folder)
    train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
    # show the shape of the data
    list_of_i = []
    for i, (X, y) in enumerate(train_iter):
        if i < 3:
            print('the shape of the', i, 'batch of the train_iter is:', X.shape)
        else:
            pass
        list_of_i.append(i)
    print(f'The number of batches is: {np.array(list_of_i).shape}')
    Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, \
        Epoch_time, Epoch_energy = train_func(vgg_f, train_iter, test_iter, LayerName, block_num, num_epochs, lr, device)
    # save the data
    np.save(epoch_batch_folder/'Layers_time.npy', Layers_time)
    np.save(epoch_batch_folder/'Train_part_time.npy', Train_part_time)
    np.save(epoch_batch_folder/'Train_time.npy', Train_time)
    np.save(epoch_batch_folder/'Train_acc.npy', Train_acc)
    np.save(epoch_batch_folder/'Test_acc.npy', Test_acc)
    np.save(epoch_batch_folder/'Epoch_time.npy', Epoch_time)
    np.save(epoch_batch_folder/'Epoch_energy.npy', Epoch_energy)

In [13]:
def train_model_c(main_folder, batch_size, num_epochs, round, lr, device):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    # 判断文件是否存在
    if epoch_batch_folder.exists():
        print("文件存在。")
        pass
    else:
        os.makedirs(epoch_batch_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", epoch_batch_folder)
        train_iter, test_iter = load_data_cifar100(batch_size, resize=224)
        # show the shape of the data
        list_of_i = []
        for i, (X, y) in enumerate(train_iter):
            if i < 3:
                print('the shape of the', i, 'batch of the train_iter is:', X.shape)
            else:
                pass
            list_of_i.append(i)
        print(f'The number of batches is: {np.array(list_of_i).shape}')
        Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, \
            Epoch_time, Epoch_energy = train_func(vgg_c, train_iter, test_iter, LayerName, block_num, num_epochs, lr, device)
        # save the data
        np.save(epoch_batch_folder/'Layers_time.npy', Layers_time)
        np.save(epoch_batch_folder/'Train_part_time.npy', Train_part_time)
        np.save(epoch_batch_folder/'Train_time.npy', Train_time)
        np.save(epoch_batch_folder/'Train_acc.npy', Train_acc)
        np.save(epoch_batch_folder/'Test_acc.npy', Test_acc)
        np.save(epoch_batch_folder/'Epoch_time.npy', Epoch_time)
        np.save(epoch_batch_folder/'Epoch_energy.npy', Epoch_energy)

In [12]:
def train_model_c10(main_folder, batch_size, num_epochs, round, lr, device):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    # 判断文件是否存在
    if epoch_batch_folder.exists():
        print("文件存在。")
        pass
    else:
        os.makedirs(epoch_batch_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", epoch_batch_folder)
        train_iter, test_iter = load_data_cifar10(batch_size, resize=224)
        # show the shape of the data
        list_of_i = []
        for i, (X, y) in enumerate(train_iter):
            if i < 3:
                print('the shape of the', i, 'batch of the train_iter is:', X.shape)
            else:
                pass
            list_of_i.append(i)
        print(f'The number of batches is: {np.array(list_of_i).shape}')
        Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, \
            Epoch_time, Epoch_energy = train_func(vgg_c, train_iter, test_iter, LayerName, block_num, num_epochs, lr, device)
        # save the data
        np.save(epoch_batch_folder/'Layers_time.npy', Layers_time)
        np.save(epoch_batch_folder/'Train_part_time.npy', Train_part_time)
        np.save(epoch_batch_folder/'Train_time.npy', Train_time)
        np.save(epoch_batch_folder/'Train_acc.npy', Train_acc)
        np.save(epoch_batch_folder/'Test_acc.npy', Test_acc)
        np.save(epoch_batch_folder/'Epoch_time.npy', Epoch_time)
        np.save(epoch_batch_folder/'Epoch_energy.npy', Epoch_energy)

In [13]:
lr = 0.01
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('The device is:', device)

The device is: cuda


In [15]:
# create the folder to store the data
main_folder = data_path/'fashion_mnist'
print('The folder is:', main_folder)
# find out that if the folder exists in the data path
# 判断文件是否存在
if main_folder.exists():
    print("文件存在。")
else:
    os.makedirs(main_folder)
    print("文件不存在，已创建。")
    print("文件创建于：", main_folder)
for epoch in epochs:
    for batch in batch_size:
        for round in range(rounds):
            train_model_f(main_folder, batch, epoch, round, lr, device)

The folder is: /root/GreenAI/Cloud/4090/Data/mobilenet_v1/fashion_mnist
文件不存在，已创建。
文件创建于： /root/GreenAI/Cloud/4090/Data/mobilenet_v1/fashion_mnist
The epoch is set: 20, batch is set: 256, is in 1th running
文件不存在，已创建。
文件创建于： /root/GreenAI/Cloud/4090/Data/mobilenet_v1/fashion_mnist/E20_B256_R0


the shape of the 0 batch of the train_iter is: torch.Size([256, 1, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([256, 1, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([256, 1, 224, 224])
The number of batches is: (235,)
training on cuda
The epoch is: 1
The batch is: 1
The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 21
The batch is: 22
The batch is: 23
The batch is: 24
The batch is: 25
The batch is: 26
The batch is: 27
The batch is: 28
The batch is: 29
The batch is: 30
The batch is: 31
The batch is: 32
The batch is: 33
The batch is: 34
The batch is: 35
The batch is: 36
The batch is: 37
The batch is: 38
The batch is: 39
The batch is: 40
The batch is: 41
The batch is

In [16]:
# create the folder to store the data
main_folder = data_path/'cifar100'
print('The folder is:', main_folder)
# find out that if the folder exists in the data path
# 判断文件是否存在
if main_folder.exists():
    print("文件存在。")
else:
    os.makedirs(main_folder)
    print("文件不存在，已创建。")
    print("文件创建于：", main_folder)
for epoch in epochs:
    for batch in batch_size:
        for round in range(rounds):
            train_model_c(main_folder, batch, epoch, round, lr, device)

The folder is: /root/GreenAI/Cloud/4090/Data/mobilenet_v1/cifar100
文件不存在，已创建。
文件创建于： /root/GreenAI/Cloud/4090/Data/mobilenet_v1/cifar100
The epoch is set: 20, batch is set: 256, is in 1th running
文件不存在，已创建。
文件创建于： /root/GreenAI/Cloud/4090/Data/mobilenet_v1/cifar100/E20_B256_R0
Files already downloaded and verified
Files already downloaded and verified
the shape of the 0 batch of the train_iter is: torch.Size([256, 3, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([256, 3, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([256, 3, 224, 224])
The number of batches is: (196,)
training on cuda
The epoch is: 1
The batch is: 1
The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 2

train acc 0.010, test acc 0.010
epoch 1, time 0.000000 sec
The total time of the 0 is: 33.3683545589447
5.8109214305877686 4.915071487426758 9.510581970214844 5.159667491912842
**************************************************
The epoch is: 2
The batch is: 1
The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 21
The batch is: 22
The batch is: 23
The batch is: 24
The batch is: 25
The batch is: 26
The batch is: 27
The batch is: 28
The batch is: 29
The batch is: 30
The batch is: 31
The batch is: 32
The batch is: 33
The batch is: 34
The batch is: 35
The batch is: 36
The batch is: 37
The batch is: 38
The batch is: 39
The batch is: 40
The batch is: 41
The batch is: 42
The batch is: 43
The batch is: 44
The batch is: 45


In [14]:
# create the folder to store the data
main_folder = data_path/'cifar10'
print('The folder is:', main_folder)
# find out that if the folder exists in the data path
# 判断文件是否存在
if main_folder.exists():
    print("文件存在。")
else:
    os.makedirs(main_folder)
    print("文件不存在，已创建。")
    print("文件创建于：", main_folder)
for epoch in epochs:
    for batch in batch_size:
        for round in range(rounds):
            train_model_c10(main_folder, batch, epoch, round, lr, device)

The folder is: /root/GreenAI/Cloud/4090/Data/mobilenet_v1/cifar10
文件不存在，已创建。
文件创建于： /root/GreenAI/Cloud/4090/Data/mobilenet_v1/cifar10
The epoch is set: 20, batch is set: 256, is in 1th running
文件不存在，已创建。
文件创建于： /root/GreenAI/Cloud/4090/Data/mobilenet_v1/cifar10/E20_B256_R0
Files already downloaded and verified
Files already downloaded and verified
the shape of the 0 batch of the train_iter is: torch.Size([256, 3, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([256, 3, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([256, 3, 224, 224])
The number of batches is: (196,)
training on cuda
The epoch is: 1
The batch is: 1
The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 21
T