In [101]:
import torch
from torch import nn
import torchvision
import torchvision.transforms as transforms
from alexnet_FashionMnist import FashionMnist
from alexnet_CIFAR100 import CIFAR100
from d2l import torch as d2l
import numpy as np
import pandas as pd
from ptflops import get_model_complexity_info
from train_layers import train_layers
from train import train_func
import matplotlib.pyplot as plt

In [102]:
alexnet_fashionmnist = FashionMnist()
alexnet_cifar100 = CIFAR100()

##### using ptflops to calculate the number of the flops in the model

In [103]:
with torch.cuda.device(0):
    net = alexnet_fashionmnist
    macs, params = get_model_complexity_info(net, (1, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    
    # net = alexnet_cifar100
    # macs, params = get_model_complexity_info(net, (3, 224, 224), as_strings=True,
    #                                         print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))

Sequential(
  46.76 M, 100.000% Params, 939.85 MMac, 99.883% MACs, 
  (0): Conv2d(11.71 k, 0.025% Params, 34.15 MMac, 3.630% MACs, 1, 96, kernel_size=(11, 11), stride=(4, 4), padding=(1, 1))
  (1): ReLU(0, 0.000% Params, 279.94 KMac, 0.030% MACs, )
  (2): MaxPool2d(0, 0.000% Params, 279.94 KMac, 0.030% MACs, kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(614.66 k, 1.314% Params, 415.51 MMac, 44.158% MACs, 96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(0, 0.000% Params, 173.06 KMac, 0.018% MACs, )
  (5): MaxPool2d(0, 0.000% Params, 173.06 KMac, 0.018% MACs, kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(885.12 k, 1.893% Params, 127.46 MMac, 13.546% MACs, 256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(0, 0.000% Params, 55.3 KMac, 0.006% MACs, )
  (8): Conv2d(1.33 M, 2.839% Params, 191.16 MMac, 20.315% MACs, 384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): 

[**我们构造一个**]高度和宽度都为224的(**单通道数据，来观察每一层输出的形状**)。
它与 :numref:`fig_alexnet`中的AlexNet架构相匹配。


In [104]:
X = torch.randn(1, 1, 224, 224) # FashionMNIST

# X = torch.randn(1, 3, 224, 224) # CIFAR100
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__,'output shape:\t',X.shape)

Conv2d output shape:	 torch.Size([1, 96, 54, 54])
ReLU output shape:	 torch.Size([1, 96, 54, 54])
MaxPool2d output shape:	 torch.Size([1, 96, 26, 26])
Conv2d output shape:	 torch.Size([1, 256, 26, 26])
ReLU output shape:	 torch.Size([1, 256, 26, 26])
MaxPool2d output shape:	 torch.Size([1, 256, 12, 12])
Conv2d output shape:	 torch.Size([1, 384, 12, 12])
ReLU output shape:	 torch.Size([1, 384, 12, 12])
Conv2d output shape:	 torch.Size([1, 384, 12, 12])
ReLU output shape:	 torch.Size([1, 384, 12, 12])
Conv2d output shape:	 torch.Size([1, 256, 12, 12])
ReLU output shape:	 torch.Size([1, 256, 12, 12])
MaxPool2d output shape:	 torch.Size([1, 256, 5, 5])
Flatten output shape:	 torch.Size([1, 6400])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1,

## 读取数据集

尽管原文中AlexNet是在ImageNet上进行训练的，但本书在这里使用的是Fashion-MNIST数据集。因为即使在现代GPU上，训练ImageNet模型，同时使其收敛可能需要数小时或数天的时间。
将AlexNet直接应用于Fashion-MNIST的一个问题是，[**Fashion-MNIST图像的分辨率**]（$28 \times 28$像素）(**低于ImageNet图像。**)
为了解决这个问题，(**我们将它们增加到$224 \times 224$**)（通常来讲这不是一个明智的做法，但在这里这样做是为了有效使用AlexNet架构）。
这里需要使用`d2l.load_data_fashion_mnist`函数中的`resize`参数执行此调整。


In [105]:
def get_dataloader_workers():
    """Use 4 processes to read the data.

    Defined in :numref:`sec_utils`"""
    return 4
def load_data_cifar100(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR100(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR100(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))

In [106]:
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224) # FashionMNIST

# train_iter, test_iter = load_data_cifar100(batch_size, resize=224) # CIFAR100
# print the shape of the train_iter
list_of_i = []
for i, (X, y) in enumerate(train_iter):
    list_of_i.append(i)

print('the shape of the train_iter is:', np.array(list_of_i).shape)
# print(list_of_i)
# print the first 10 batch of the train_iter
for i, (X, y) in enumerate(train_iter):
    if i < 10:
        print('the shape of the', i, 'batch of the train_iter is:', X.shape)
    else:
        break

the shape of the train_iter is: (469,)
the shape of the 0 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 3 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 4 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 5 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 6 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 7 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 8 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 9 batch of the train_iter is: torch.Size([128, 1, 224, 224])


## [**训练AlexNet**]

现在AlexNet可以开始被训练了。与 :numref:`sec_lenet`中的LeNet相比，这里的主要变化是使用更小的学习速率训练，这是因为网络更深更广、图像分辨率更高，训练卷积神经网络就更昂贵。


In [107]:
lr, num_epochs = 0.01, 10
device = d2l.try_gpu()
# Time_Layers, Time_AllEpochs, TestAcc, TrainLoss, TrainAcc, TimeEpoch, Energy_AllEpochs, TrainTime, Timport= train_layers(alexnet, train_iter, test_iter, num_epochs, lr, device)
Time_AllEpochs, TestAcc, TrainLoss, TrainAcc, TimeEpoch, Energy_AllEpochs, TrainTime, TTrainAccLoss = train_func(alexnet_fashionmnist, train_iter, test_iter, num_epochs, lr, device) # FashionMNIST
# Time_AllEpochs, TestAcc, TrainLoss, TrainAcc, TimeEpoch, Energy_AllEpochs, TrainTime, TTrainAccLoss = train_func(alexnet_cifar100, train_iter, test_iter, num_epochs, lr, device) # CIFAR100

training on cuda:0
epoch 1
round 0
time to device 0.005492 sec
round 1
time to device 0.004411 sec
round 2
time to device 0.004281 sec
round 3
time to device 0.004301 sec
round 4
time to device 0.004285 sec
round 5
time to device 0.004282 sec
round 6
time to device 0.004319 sec
round 7
time to device 0.004252 sec
round 8
time to device 0.004249 sec
round 9
time to device 0.004243 sec
round 10
time to device 0.004282 sec
round 11
time to device 0.004262 sec
round 12
time to device 0.004303 sec
round 13
time to device 0.004256 sec
round 14
time to device 0.004237 sec
round 15
time to device 0.004461 sec
round 16
time to device 0.004247 sec
round 17
time to device 0.004283 sec
round 18
time to device 0.004280 sec
round 19
time to device 0.004232 sec
round 20
time to device 0.004270 sec
round 21
time to device 0.004290 sec
round 22
time to device 0.004262 sec
round 23
time to device 0.004255 sec
round 24
time to device 0.004263 sec
round 25
time to device 0.004240 sec
round 26
time to devi

In [108]:
# # print('Forward Layers Time: \n', 
# #       'Conv2d time: ', Time_Layers[0,0], '\n',
# #       'ReLU time: ', Time_Layers[0,1], '\n',
# #       'MaxPool2d time: ', Time_Layers[0,2], '\n',
# #       'Linear time: ', Time_Layers[0,3], '\n',
# #       'Dropout time: ', Time_Layers[0,4], '\n',
# #       'Flatten time: ', Time_Layers[0,5])
# # print('*'*50)
print('Time_AllEpochs: \n', 
      'Time to Device time: ', Time_AllEpochs[0,0], '\n',
      'Forward time: ', Time_AllEpochs[0,1], '\n',
      'Calculate Loss time: ', Time_AllEpochs[0,2], '\n',
      'Backward time: ', Time_AllEpochs[0,3], '\n',
      'Optimize time: ', Time_AllEpochs[0,4], '\n',
      'Test time: ', Time_AllEpochs[0,5])
print('*'*50)
print('Train Time of each epoch:', TrainTime[0])
print('*'*50)
print('Evaluation time: ', TTrainAccLoss[0])
print('*'*50)
print('TestAcc:', TestAcc)
print('*'*50)
print('TrainLoss:', TrainLoss)
print('*'*50)
print('TrainAcc:', TrainAcc)
print('*'*50)
print('TimeEpoch:', TimeEpoch[0])
print('*'*50)
print('Energy_AllEpochs:', Energy_AllEpochs[0], '\n',
      'Total Energy:',np.sum(Energy_AllEpochs[0,0]), '\n',
      'The time of the first epoch:', len(Energy_AllEpochs[0,0]))

Time_AllEpochs: 
 Time to Device time:  [2.01870584] 
 Forward time:  [11.29799891] 
 Calculate Loss time:  [0.11765909] 
 Backward time:  [22.77441883] 
 Optimize time:  [0.99459362] 
 Test time:  [2.7939074]
**************************************************
Train Time of each epoch: 37.20421648025513
**************************************************
Evaluation time:  0.14483356475830078
**************************************************
TestAcc: [0.7326, 0.8048, 0.8339, 0.8416, 0.8476, 0.8643, 0.8676, 0.8698, 0.8756, 0.8844]
**************************************************
TrainLoss: [[1.328339028040568], [0.6396104882558187], [0.5269196425120036], [0.46434884045918784], [0.4260135863939921], [0.3961130197207133], [0.3722379963874817], [0.3535518149375916], [0.3407807276725769], [0.3267735411008199]]
**************************************************
TrainAcc: [[0.5070666666666667], [0.7602], [0.8037833333333333], [0.8292], [0.8443333333333334], [0.85615], [0.8643166666666666], [

In [3]:
import os
working_dir = os.getcwd()
print('The working dir is: ', working_dir)

# find out the parent directory
# train_data = os.path.join(working_dir, 'Alexnet_train_data')
train_data = os.path.join(working_dir, 'Resnet_train_data')
print('The train_data dir is: ', train_data)

# sub_folder = os.path.join(train_data, 'round10')
# print('The sub_folder dir is: ', sub_folder)

# find out the folders that in the train_data
folders = os.listdir(train_data)
print('The folders in the train_data are: ', folders)

The working dir is:  /Users/dtjgp/Learning/GreenAI/GPU/universal
The train_data dir is:  /Users/dtjgp/Learning/GreenAI/GPU/universal/Resnet_train_data
The folders in the train_data are:  ['round10', 'round4', 'round3', 'round2', 'round5', 'round7', 'round9', 'round8', 'round6', 'round1']


In [110]:
# save the Time_AllEpochs, TestAcc, TrainLoss, TrainAcc, TimeEpoch, Energy_AllEpochs, TrainTime, TTrainAccLoss to the sub_folder dir as .npy file
np.save(os.path.join(sub_folder, 'Time_AllEpochs.npy'), Time_AllEpochs)
np.save(os.path.join(sub_folder, 'TestAcc.npy'), TestAcc)
np.save(os.path.join(sub_folder, 'TrainLoss.npy'), TrainLoss)
np.save(os.path.join(sub_folder, 'TrainAcc.npy'), TrainAcc)
np.save(os.path.join(sub_folder, 'TimeEpoch.npy'), TimeEpoch)
np.save(os.path.join(sub_folder, 'Energy_AllEpochs.npy'), Energy_AllEpochs)
np.save(os.path.join(sub_folder, 'TrainTime.npy'), TrainTime)
np.save(os.path.join(sub_folder, 'TTrainAccLoss.npy'), TTrainAccLoss)