In [33]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from torch.nn import functional as F
from d2l import torch as d2l
import numpy as np
import pandas as pd
from ptflops import get_model_complexity_info
from pathlib import Path
import subprocess
import os
import time
import functools

### Path

In [34]:
'''find the Model path'''
# find the current path
current_path = os.getcwd()
print('The current path is:', current_path)

# find the parent path
parent_path = Path(current_path).parent
print('The parent path is:', parent_path)

# find the data path
data_path = parent_path / 'Data/resnet'
print('The data path is:', data_path)

The current path is: /home/yj/FinalThesis/GreenAI/Cloud/3060_95W/code
The parent path is: /home/yj/FinalThesis/GreenAI/Cloud/3060_95W
The data path is: /home/yj/FinalThesis/GreenAI/Cloud/3060_95W/Data/resnet


### Model

In [35]:
class Residual(nn.Module):  #@save
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                               kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                               kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)
    
    
def resnet(img_channel, num_labels):
    # blk = Residual(3,6, use_1x1conv=True, strides=2)

    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                    nn.BatchNorm2d(64), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

    def resnet_block(input_channels, num_channels, num_residuals,
                    first_block=False):
        blk = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.append(Residual(input_channels, num_channels,
                                    use_1x1conv=True, strides=2))
            else:
                blk.append(Residual(num_channels, num_channels))
        return blk

    b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
    b3 = nn.Sequential(*resnet_block(64, 128, 2))
    b4 = nn.Sequential(*resnet_block(128, 256, 2))
    b5 = nn.Sequential(*resnet_block(256, 512, 2))

    net = nn.Sequential(b1, b2, b3, b4, b5,
                        nn.AdaptiveAvgPool2d((1,1)),
                        nn.Flatten(), nn.Linear(512, num_labels))
    return net

In [37]:
# print the model structure
net = resnet(1, 10)    
# print(net)
# print each layer
layer_name = []
block_num = 0
for layer in net:
    name = layer.__class__.__name__
    if name == 'Sequential':
        block_num += 1
        for block in layer:
            name = block.__class__.__name__
            if name == 'Residual':
                for sub_block in block:
                    name = sub_block.__class__.__name__
                    layer_name.append(name)
            layer_name.append(name)
    layer_name.append(name)
# find the unique layer name, and fix the order
layer_name = sorted(list(set(layer_name)))
print('The layer name is:', layer_name)
# the number of layers, which contains ['AdaptiveAvgPool2d', 'Conv2d', 'Dropout', 'Flatten', 'Linear', 'MaxPool2d', 'ReLU']
layer_type = len(layer_name) 
print('The number of layers is:', layer_type)
print('The number of blocks is:', block_num)

The layer name is: ['AdaptiveAvgPool2d', 'BatchNorm2d', 'Conv2d', 'Flatten', 'Linear', 'MaxPool2d', 'ReLU', 'Residual']
The number of layers is: 8
The number of blocks is: 5


In [30]:
# print the model structure
net = resnet(1, 10)    
# print(net)
# print each layer
layer_name = []
block_num = 0
for layer in net:
    name = layer.__class__.__name__
    if name == 'Sequential':
        block_num += 1
        for block in layer:
            name = block.__class__.__name__
            layer_name.append(name)
    layer_name.append(name)
# find the unique layer name, and fix the order
layer_name = sorted(list(set(layer_name)))
print('The layer name is:', layer_name)
# the number of layers, which contains ['AdaptiveAvgPool2d', 'Conv2d', 'Dropout', 'Flatten', 'Linear', 'MaxPool2d', 'ReLU']
layer_type = len(layer_name) 
print('The number of layers is:', layer_type)
print('The number of blocks is:', block_num)

The layer name is: ['AdaptiveAvgPool2d', 'BatchNorm2d', 'Conv2d', 'Flatten', 'Linear', 'MaxPool2d', 'ReLU', 'Residual']
The number of layers is: 8
The number of blocks is: 5


build different alexnet model for different datasets

In [12]:
# 对于不同的数据集，要设置不同的img_channel和num_labels
# Fashion-MNIST中的图像通道数为1，类别数为10
resnet_f = resnet(1, 10)
# CIFAR100中的图像通道数为3，类别数为100
resnet_c = resnet(3, 100)

In [13]:
# fashion mnist
with torch.cuda.device(0):
    macs_f, params_f = get_model_complexity_info(resnet_f, (1, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_f))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params_f))

print('*'*50)

# cifar100
with torch.cuda.device(0):
    macs_c, params_c = get_model_complexity_info(resnet_c, (3, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs_c))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params_c))

Sequential(
  11.18 M, 100.000% Params, 1.74 GMac, 99.820% MACs, 
  (0): Sequential(
    3.33 k, 0.030% Params, 43.35 MMac, 2.482% MACs, 
    (0): Conv2d(3.2 k, 0.029% Params, 40.14 MMac, 2.298% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(128, 0.001% Params, 1.61 MMac, 0.092% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(0, 0.000% Params, 802.82 KMac, 0.046% MACs, )
    (3): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.046% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    148.22 k, 1.326% Params, 464.83 MMac, 26.611% MACs, 
    (0): Residual(
      74.11 k, 0.663% Params, 232.42 MMac, 13.306% MACs, 
      (conv1): Conv2d(36.93 k, 0.330% Params, 115.81 MMac, 6.630% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(36.93 k, 0.330% Params, 115.81 MMac, 6.630% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 

### Datasets

In [15]:
X_f = torch.randn(size=(1, 1, 224, 224), dtype=torch.float32) # fashion mnist
X_c = torch.randn(size=(1, 3, 224, 224), dtype=torch.float32) # cifar100

for layer in resnet_f:
    X_f=layer(X_f)
    print(layer.__class__.__name__,'output shape:\t',X_f.shape)

print('*'*50)

for layer in resnet_c:
    X_c=layer(X_c)
    print(layer.__class__.__name__,'output shape:\t',X_c.shape)

Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 128, 28, 28])
Sequential output shape:	 torch.Size([1, 256, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
AdaptiveAvgPool2d output shape:	 torch.Size([1, 512, 1, 1])
Flatten output shape:	 torch.Size([1, 512])
Linear output shape:	 torch.Size([1, 10])
**************************************************
Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 64, 56, 56])
Sequential output shape:	 torch.Size([1, 128, 28, 28])
Sequential output shape:	 torch.Size([1, 256, 14, 14])
Sequential output shape:	 torch.Size([1, 512, 7, 7])
AdaptiveAvgPool2d output shape:	 torch.Size([1, 512, 1, 1])
Flatten output shape:	 torch.Size([1, 512])
Linear output shape:	 torch.Size([1, 100])


In [16]:
# load the data
# fashion mnist
def get_dataloader_workers():
    """Use 4 processes to read the data.

    Defined in :numref:`sec_utils`"""
    return 4

def load_data_fashion_mnist(batch_size, resize=None):
    """下载Fashion-MNIST数据集，然后将其加载到内存中

    Defined in :numref:`sec_fashion_mnist`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=get_dataloader_workers()))

def load_data_cifar100(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR100(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR100(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))

### Parameters

In [17]:
batch_size = [64, 128, 256]
epochs = [10, 20, 30, 40, 50]
rounds = 1

### Train Model

In [21]:
def train_func(net, train_iter, test_iter, layer_type, block_num, num_epochs, lr, device):
    def init_weights(m): # 初始化权重
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    # record each block running time
    Block_time = np.zeros((block_num, num_epochs)) # each row is a block, each column is an epoch
    Layers_time = np.zeros((layer_type, num_epochs)) # each row is a layer, each column is an epoch
    Train_part_time = np.zeros((4, num_epochs)) # store the time to device, forward and backward time, and test time of each epoch
    Train_time = np.zeros(num_epochs) # store the total training time of each epoch
    Train_acc = np.zeros(num_epochs) # store the training accuracy of each epoch
    Test_acc = np.zeros(num_epochs) # store the test accuracy of each epoch
    Epoch_time = np.zeros(num_epochs) # store the total time of each epoch
    Epoch_energy = np.zeros((num_epochs,1), dtype='object') # store the total energy of each epoch
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()
    block_timer = d2l.Timer()
    timer = d2l.Timer()
    train_timer = d2l.Timer()
    ttd_timer = d2l.Timer()
    forward_timer = d2l.Timer()
    backward_timer = d2l.Timer()
    layer_timer = d2l.Timer()
    test_timer = d2l.Timer()
    # start training
    for epoch in range(num_epochs):
        print('The epoch is:', epoch+1)
        timer.start()
        net.train()
        train_epoch, ttd_epoch, forward_epoch, backward_epoch, testtime_epoch= 0.0, 0.0, 0.0, 0.0, 0.0
        layer_epoch = np.zeros((layer_type, 1)) # store the total running time of each layer in one epoch
        metric = d2l.Accumulator(3)  # train_loss, train_acc, num_examples   
        # start the nvidia-smi command
        with open('gpu_power_usage.csv', 'w') as file:
            # Start the nvidia-smi command
            nvidia_smi_process = subprocess.Popen(
                ["nvidia-smi", "--query-gpu=power.draw", "--format=csv", "--loop-ms=1000"],
                stdout=file,  # Redirect the output directly to the file
                stderr=subprocess.PIPE,
                text=True)
        train_timer.start()
        for i, (X, y) in enumerate(train_iter):
            batch_block_num = 0
            print('The batch is:', i+1)
            optimizer.zero_grad()
            # to device
            torch.cuda.synchronize()  # 等待数据传输完成
            ttd_timer.start()
            X, y = X.to(device), y.to(device)
            ttd_epoch += ttd_timer.stop()
            # forward
            torch.cuda.synchronize()  # 等待数据传输完成
            forward_timer.start()
            y_hat = X
            for layer in net:
                name = layer.__class__.__name__ # 获取层的名字
                if name == 'Sequential':
                    batch_block_num += 1
                    block_timer.start()
                    for sublayer in layer:
                        layer_index = layer_name.index(sublayer.__class__.__name__)
                        layer_timer.start()
                        y_hat = sublayer(y_hat)
                        torch.cuda.synchronize()  # 等待数据传输完成
                        layer_epoch[layer_index] += layer_timer.stop()
                    Block_time[batch_block_num-1, epoch] += block_timer.stop()
                else:
                    name = layer.__class__.__name__
                    layer_index = layer_name.index(name)
                    layer_timer.start()
                    y_hat = layer(y_hat)
                    torch.cuda.synchronize()  # 等待数据传输完成
                    layer_epoch[layer_index] += layer_timer.stop()
            forward_epoch += forward_timer.stop()
            # loss
            l = loss_fn(y_hat, y)
            # backward
            torch.cuda.synchronize()  # 等待数据传输完成
            backward_timer.start()
            l.backward()
            backward_epoch += backward_timer.stop()
            # optimize
            torch.cuda.synchronize()  # 等待数据传输完成
            optimizer.step()
            torch.cuda.synchronize()  # 等待数据传输完成
            with torch.no_grad():
                metric.add(l*X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            train_acc = metric[1] / metric[2]
        train_epoch = train_timer.stop()
        test_timer.start()
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        testtime_epoch = test_timer.stop()
        print(f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')
        print('epoch %d, time %f sec' % (epoch+1, timer.sum()))
        # store the time and acc data
        Epoch_time[epoch] = timer.stop()
        print(f'The total time of the {epoch} is:', Epoch_time[epoch])
        Layers_time[:, epoch] = layer_epoch.flatten()
        Train_part_time[:, epoch] = ttd_epoch, forward_epoch, backward_epoch, testtime_epoch
        Train_time[epoch] = train_epoch
        Train_acc[epoch] = train_acc
        Test_acc[epoch] = test_acc
        # stop the nvidia-smi command
        nvidia_smi_process.terminate()
        # calculate the energy consumption of each epoch
        GPU_df = pd.read_csv('gpu_power_usage.csv')
        for row in range(len(GPU_df)):
            GPU_df.iloc[row,0] = GPU_df.iloc[row,0].replace(' W','')
        Consumption_df = GPU_df.astype(float)  
        EnergyDatai = Consumption_df.iloc[:,0].values # 将数据转换为numpy数组
        # store the energy data
        Epoch_energy[epoch,0] = EnergyDatai
    return Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, Epoch_time, Epoch_energy, Block_time

### Train the model

In [23]:
def train_model_f(main_folder, batch_size, num_epochs, round, lr, device):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    # 判断文件是否存在
    if epoch_batch_folder.exists():
        print("文件存在。")
    else:
        os.makedirs(epoch_batch_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", epoch_batch_folder)
    train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
    # show the shape of the data
    list_of_i = []
    for i, (X, y) in enumerate(train_iter):
        if i < 3:
            print('the shape of the', i, 'batch of the train_iter is:', X.shape)
        else:
            pass
        list_of_i.append(i)
    print(f'The number of batches is: {np.array(list_of_i).shape}')
    Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, \
        Epoch_time, Epoch_energy, Block_time = train_func(resnet_f, train_iter, test_iter, layer_type, block_num, num_epochs, lr, device)
    # save the data
    np.save(epoch_batch_folder/'Layers_time.npy', Layers_time)
    np.save(epoch_batch_folder/'Train_part_time.npy', Train_part_time)
    np.save(epoch_batch_folder/'Train_time.npy', Train_time)
    np.save(epoch_batch_folder/'Train_acc.npy', Train_acc)
    np.save(epoch_batch_folder/'Test_acc.npy', Test_acc)
    np.save(epoch_batch_folder/'Epoch_time.npy', Epoch_time)
    np.save(epoch_batch_folder/'Epoch_energy.npy', Epoch_energy)
    np.save(epoch_batch_folder/'Block_time.npy', Block_time)

In [24]:
def train_model_c(main_folder, batch_size, num_epochs, round, lr, device):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    # 判断文件是否存在
    if epoch_batch_folder.exists():
        print("文件存在。")
        pass
    else:
        os.makedirs(epoch_batch_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", epoch_batch_folder)
        train_iter, test_iter = load_data_cifar100(batch_size, resize=224)
        # show the shape of the data
        list_of_i = []
        for i, (X, y) in enumerate(train_iter):
            if i < 3:
                print('the shape of the', i, 'batch of the train_iter is:', X.shape)
            else:
                pass
            list_of_i.append(i)
        print(f'The number of batches is: {np.array(list_of_i).shape}')
        Layers_time, Train_part_time, Train_time, Train_acc, Test_acc, \
            Epoch_time, Epoch_energy, Block_time = train_func(resnet_c, train_iter, test_iter, layer_type, block_num, num_epochs, lr, device)
        # save the data
        np.save(epoch_batch_folder/'Layers_time.npy', Layers_time)
        np.save(epoch_batch_folder/'Train_part_time.npy', Train_part_time)
        np.save(epoch_batch_folder/'Train_time.npy', Train_time)
        np.save(epoch_batch_folder/'Train_acc.npy', Train_acc)
        np.save(epoch_batch_folder/'Test_acc.npy', Test_acc)
        np.save(epoch_batch_folder/'Epoch_time.npy', Epoch_time)
        np.save(epoch_batch_folder/'Epoch_energy.npy', Epoch_energy)
        np.save(epoch_batch_folder/'Block_time.npy', Block_time)

In [25]:
lr = 0.01
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('The device is:', device)

The device is: cuda


In [26]:
# create the folder to store the data
main_folder = data_path/'fashion_mnist'
print('The folder is:', main_folder)
# find out that if the folder exists in the data path
# 判断文件是否存在
if main_folder.exists():
    print("文件存在。")
else:
    os.makedirs(main_folder)
    print("文件不存在，已创建。")
    print("文件创建于：", main_folder)
for epoch in epochs:
    for batch in batch_size:
        for round in range(rounds):
            train_model_f(main_folder, batch, epoch, round, lr, device)

The folder is: /home/yj/FinalThesis/GreenAI/Cloud/3060_95W/Data/resnet/fashion_mnist
文件不存在，已创建。
文件创建于： /home/yj/FinalThesis/GreenAI/Cloud/3060_95W/Data/resnet/fashion_mnist
The epoch is set: 10, batch is set: 64, is in 1th running
文件不存在，已创建。
文件创建于： /home/yj/FinalThesis/GreenAI/Cloud/3060_95W/Data/resnet/fashion_mnist/E10_B64_R0
the shape of the 0 batch of the train_iter is: torch.Size([64, 1, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([64, 1, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([64, 1, 224, 224])
The number of batches is: (938,)
training on cuda
The epoch is: 1
The batch is: 1
The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 21
The batch is: 22
The batc

OutOfMemoryError: CUDA out of memory. Tried to allocate 392.00 MiB. GPU 0 has a total capacty of 5.77 GiB of which 277.44 MiB is free. Including non-PyTorch memory, this process has 5.16 GiB memory in use. Of the allocated memory 4.75 GiB is allocated by PyTorch, and 257.06 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# # create the folder to store the data
# main_folder = data_path/'cifar100'
# print('The folder is:', main_folder)
# # find out that if the folder exists in the data path
# # 判断文件是否存在
# if main_folder.exists():
#     print("文件存在。")
# else:
#     os.makedirs(main_folder)
#     print("文件不存在，已创建。")
#     print("文件创建于：", main_folder)
# for epoch in epochs:
#     for batch in batch_size:
#         for round in range(rounds):
#             train_model_c(main_folder, batch, epoch, round, lr, device)