In [1]:
import torch
from torch import nn
from alexnet import AlexNet
from d2l import torch as d2l
import time
import numpy as np
import pandas as pd
import psutil
from ptflops import get_model_complexity_info
from train import train_func

In [2]:
alexnet = AlexNet()

##### using ptflops to calculate the number of the flops in the model

In [3]:
with torch.cuda.device(0):
    net = alexnet
    macs, params = get_model_complexity_info(net, (1, 224, 224), as_strings=True,
                                            print_per_layer_stat=True, verbose=True)
    print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))
    alexnet_para_num = params

Sequential(
  46.76 M, 100.000% Params, 939.85 MMac, 99.883% MACs, 
  (0): Conv2d(11.71 k, 0.025% Params, 34.15 MMac, 3.630% MACs, 1, 96, kernel_size=(11, 11), stride=(4, 4), padding=(1, 1))
  (1): ReLU(0, 0.000% Params, 279.94 KMac, 0.030% MACs, )
  (2): MaxPool2d(0, 0.000% Params, 279.94 KMac, 0.030% MACs, kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(614.66 k, 1.314% Params, 415.51 MMac, 44.158% MACs, 96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(0, 0.000% Params, 173.06 KMac, 0.018% MACs, )
  (5): MaxPool2d(0, 0.000% Params, 173.06 KMac, 0.018% MACs, kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(885.12 k, 1.893% Params, 127.46 MMac, 13.546% MACs, 256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(0, 0.000% Params, 55.3 KMac, 0.006% MACs, )
  (8): Conv2d(1.33 M, 2.839% Params, 191.16 MMac, 20.315% MACs, 384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): 

[**我们构造一个**]高度和宽度都为224的(**单通道数据，来观察每一层输出的形状**)。
它与 :numref:`fig_alexnet`中的AlexNet架构相匹配。


In [4]:
X = torch.randn(1, 1, 224, 224)
for layer in net:
    X=layer(X)
    print(layer.__class__.__name__,'output shape:\t',X.shape)

Conv2d output shape:	 torch.Size([1, 96, 54, 54])
ReLU output shape:	 torch.Size([1, 96, 54, 54])
MaxPool2d output shape:	 torch.Size([1, 96, 26, 26])
Conv2d output shape:	 torch.Size([1, 256, 26, 26])
ReLU output shape:	 torch.Size([1, 256, 26, 26])
MaxPool2d output shape:	 torch.Size([1, 256, 12, 12])
Conv2d output shape:	 torch.Size([1, 384, 12, 12])
ReLU output shape:	 torch.Size([1, 384, 12, 12])
Conv2d output shape:	 torch.Size([1, 384, 12, 12])
ReLU output shape:	 torch.Size([1, 384, 12, 12])
Conv2d output shape:	 torch.Size([1, 256, 12, 12])
ReLU output shape:	 torch.Size([1, 256, 12, 12])
MaxPool2d output shape:	 torch.Size([1, 256, 5, 5])
Flatten output shape:	 torch.Size([1, 6400])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1, 4096])
ReLU output shape:	 torch.Size([1, 4096])
Dropout output shape:	 torch.Size([1, 4096])
Linear output shape:	 torch.Size([1,

## 读取数据集

尽管原文中AlexNet是在ImageNet上进行训练的，但本书在这里使用的是Fashion-MNIST数据集。因为即使在现代GPU上，训练ImageNet模型，同时使其收敛可能需要数小时或数天的时间。
将AlexNet直接应用于Fashion-MNIST的一个问题是，[**Fashion-MNIST图像的分辨率**]（$28 \times 28$像素）(**低于ImageNet图像。**)
为了解决这个问题，(**我们将它们增加到$224 \times 224$**)（通常来讲这不是一个明智的做法，但在这里这样做是为了有效使用AlexNet架构）。
这里需要使用`d2l.load_data_fashion_mnist`函数中的`resize`参数执行此调整。


In [5]:
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=224)
# print the shape of the train_iter
list_of_i = []
for i, (X, y) in enumerate(train_iter):
    list_of_i.append(i)

print('the shape of the train_iter is:', np.array(list_of_i).shape)
# print(list_of_i)
# print the first 10 batch of the train_iter
for i, (X, y) in enumerate(train_iter):
    if i < 10:
        print('the shape of the', i, 'batch of the train_iter is:', X.shape)
    else:
        break

the shape of the train_iter is: (469,)
the shape of the 0 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 3 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 4 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 5 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 6 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 7 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 8 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 9 batch of the train_iter is: torch.Size([128, 1, 224, 224])


## [**训练AlexNet**]

现在AlexNet可以开始被训练了。与 :numref:`sec_lenet`中的LeNet相比，这里的主要变化是使用更小的学习速率训练，这是因为网络更深更广、图像分辨率更高，训练卷积神经网络就更昂贵。


In [6]:
lr, num_epochs = 0.01, 20
device = d2l.try_gpu()
timeenergy_data_forward, timeenergy_data_round, acc_data, train_l, train_acc, time_data_epoch, energy_data_epoch = train_func(alexnet, train_iter, test_iter, num_epochs, lr, device)

training on cuda:0
epoch 1
round 0
time to device 0.004859 sec
time forward 0.345638 sec
loss time 0.026991 sec
backward time 0.263351 sec
optimizer time 0.011854 sec
training time in round 0 cost 0.7135848999023438 sec
loss 2.299846, train acc 0.117188
round 1
time to device 0.004083 sec
time forward 0.001173 sec
loss time 0.000094 sec
backward time 0.001205 sec
optimizer time 0.000222 sec
training time in round 1 cost 0.04630899429321289 sec
loss 2.300805, train acc 0.093750
round 2
time to device 0.004071 sec
time forward 0.001199 sec
loss time 0.000085 sec
backward time 0.001152 sec
optimizer time 0.000211 sec
training time in round 2 cost 0.04701519012451172 sec
loss 2.302157, train acc 0.093750
round 3
time to device 0.004131 sec
time forward 0.001491 sec
loss time 0.000091 sec
backward time 0.001246 sec
optimizer time 0.000212 sec
training time in round 3 cost 0.044675350189208984 sec
loss 2.301844, train acc 0.091797
round 4
time to device 0.004093 sec
time forward 0.001480 sec

KeyboardInterrupt: 

In [None]:
time_forward = timeenergy_data_forward[:,:,0]
time_forward

NameError: name 'timeenergy_data_forward' is not defined

In [None]:
time_round = timeenergy_data_round[:,:,0]
time_round

array([[1.89582515, 0.89699006, 0.07393169, 0.8366096 , 0.11032629,
        2.84555626],
       [1.89942431, 0.54779673, 0.05518675, 0.58306766, 0.09714484,
        2.87416577],
       [1.90760684, 0.55828738, 0.05451298, 0.60093498, 0.10437107,
        2.88428807],
       [1.90708113, 0.54389548, 0.04957867, 0.59477615, 0.10197997,
        2.87410426],
       [1.90321302, 0.55014229, 0.0509057 , 0.60187483, 0.10077024,
        2.91015196],
       [1.91447449, 0.56767845, 0.05369401, 0.60575104, 0.10498428,
        2.888592  ],
       [1.91307616, 0.56372738, 0.05298352, 0.60420418, 0.10338473,
        2.89238954],
       [1.90374565, 0.54683757, 0.04951191, 0.60788321, 0.10307193,
        2.89812613],
       [1.90572596, 0.56138587, 0.05364013, 0.60597825, 0.104774  ,
        2.88789153],
       [1.91457653, 0.57012939, 0.04994798, 0.60358262, 0.10379052,
        2.91053486],
       [1.91076541, 0.56449318, 0.05256701, 0.60598707, 0.10304236,
        2.89055467],
       [1.92141819, 0

In [None]:
test_acc = acc_data
test_acc

[0.7188,
 0.7845,
 0.8137,
 0.84,
 0.8486,
 0.8553,
 0.859,
 0.8722,
 0.8793,
 0.8786,
 0.8787,
 0.8808,
 0.8894,
 0.8828,
 0.8973,
 0.8948,
 0.8947,
 0.8992,
 0.9011,
 0.8884]

In [None]:
train_l

[[2.3056108951568604,
  2.3040966987609863,
  2.3046716849009194,
  2.3026143312454224,
  2.3026888370513916,
  2.302878220876058,
  2.302434512547084,
  2.3025638461112976,
  2.3020624849531384,
  2.3021290779113768,
  2.301771402359009,
  2.301379064718882,
  2.3011758144085226,
  2.30123393876212,
  2.3009531180063885,
  2.3007819205522537,
  2.300571287379545,
  2.3003641896777682,
  2.3007899836490027,
  2.3009241342544557,
  2.3005974406287786,
  2.3003357973965732,
  2.300154209136963,
  2.300407220919927,
  2.3003690910339354,
  2.3003312899516177,
  2.299904125708121,
  2.2996896505355835,
  2.2994924742600014,
  2.299337116877238,
  2.2991368539871706,
  2.2989013716578484,
  2.29886236335292,
  2.2986360718222225,
  2.2985228061676026,
  2.2983693612946405,
  2.2982520606066728,
  2.2981289123233997,
  2.2979410061469445,
  2.297804242372513,
  2.2975519168667677,
  2.2972997086388722,
  2.297116246334342,
  2.2971660603176463,
  2.2968134032355416,
  2.296666725822117,
  2.

In [None]:
train_acc

[[0.125,
  0.1171875,
  0.10416666666666667,
  0.111328125,
  0.1046875,
  0.10416666666666667,
  0.10825892857142858,
  0.107421875,
  0.10677083333333333,
  0.10625,
  0.10866477272727272,
  0.10807291666666667,
  0.10817307692307693,
  0.10602678571428571,
  0.1078125,
  0.10791015625,
  0.10983455882352941,
  0.11024305555555555,
  0.10896381578947369,
  0.109765625,
  0.11086309523809523,
  0.11221590909090909,
  0.11243206521739131,
  0.109375,
  0.11,
  0.1084735576923077,
  0.1099537037037037,
  0.10993303571428571,
  0.11018318965517242,
  0.11223958333333334,
  0.11164314516129033,
  0.11376953125,
  0.11410984848484848,
  0.11511948529411764,
  0.115625,
  0.11675347222222222,
  0.11739864864864864,
  0.11842105263157894,
  0.11979166666666667,
  0.1205078125,
  0.12157012195121951,
  0.12276785714285714,
  0.12318313953488372,
  0.12357954545454546,
  0.12552083333333333,
  0.12567934782608695,
  0.12483377659574468,
  0.12483723958333333,
  0.12531887755102042,
  0.125,
  

In [None]:
time_data_epoch

time_epoch = np.zeros((time_data_epoch.shape[0], time_data_epoch.shape[1]))
for i in range(len(time_data_epoch[:,0])):
    if i == 0:
        time_epoch[i,0] = time_data_epoch[i,0]
        time_epoch[i,1] = time_data_epoch[i,1]
    else:
        time_epoch[i,0] = time_data_epoch[i,0]
        time_epoch[i,1] = time_data_epoch[i,1] - time_data_epoch[i-1,1]
time_epoch

array([[ 1.        , 49.19019556],
       [ 2.        , 48.82736111],
       [ 3.        , 49.05926299],
       [ 4.        , 49.17564511],
       [ 5.        , 49.27827048],
       [ 6.        , 49.3755486 ],
       [ 7.        , 49.39823556],
       [ 8.        , 49.33143592],
       [ 9.        , 49.40453982],
       [10.        , 49.43203664],
       [11.        , 49.47626305],
       [12.        , 49.47726536],
       [13.        , 49.46333647],
       [14.        , 49.46429849],
       [15.        , 49.44848776],
       [16.        , 49.51685452],
       [17.        , 49.54309821],
       [18.        , 49.5515902 ],
       [19.        , 49.6170373 ],
       [20.        , 49.56146193]])

In [None]:
import os
working_dir = os.getcwd()
working_dir

# find out the parent directory
parent_dir1 = os.path.dirname(working_dir)
print(parent_dir1)

parent_dir2 = os.path.dirname(parent_dir1)
print(parent_dir2)

parent_dir3 = os.path.dirname(parent_dir2)
print(parent_dir3)

data_folder = os.path.join(parent_dir3, 'data')
print(data_folder)

/home/yj/FinalThesis/GreenAI/Alexnet_linux/code/GPU_nvidia-smi
/home/yj/FinalThesis/GreenAI/Alexnet_linux/code
/home/yj/FinalThesis/GreenAI/Alexnet_linux
/home/yj/FinalThesis/GreenAI/Alexnet_linux/data


In [None]:
# find the second_part folder
second_part_dir = os.path.join(data_folder, 'epoch_20SGD_GPU')
second_part_dir

'/home/yj/FinalThesis/GreenAI/Alexnet_linux/data/epoch_20SGD_GPU'

In [None]:
# save the data as .npy file
np.save(os.path.join(second_part_dir, 'time_forward.npy'), time_forward)
np.save(os.path.join(second_part_dir, 'time_round.npy'), time_round)
np.save(os.path.join(second_part_dir, 'test_acc.npy'), test_acc)
np.save(os.path.join(second_part_dir, 'train_acc.npy'), train_acc)
np.save(os.path.join(second_part_dir, 'train_l.npy'), train_l)
np.save(os.path.join(second_part_dir, 'time_epoch.npy'), time_epoch)
np.save(os.path.join(second_part_dir, 'energy_epoch.npy'), energy_data_epoch)