# This code is to gather the information of the energy consumption of the whole training process of different models.

## import the required libraries

In [1]:
import torch
from torch import nn
from torch.nn import functional as F
import torchvision
import torchvision.transforms as transforms
from d2l import torch as d2l
import numpy as np
import pandas as pd
from ptflops import get_model_complexity_info
from pathlib import Path
import os
import time
import pynvml
import threading
import queue

## find the path

In [2]:
'''find the Model path'''
# find the current path
from pathlib import Path

# find the current path
current_path = Path.cwd()
print('The current path is:', current_path)

# find the data path
data_path = Path(current_path / 'ModelsData')
print('The data path is:', data_path)

The current path is: /root/autodl-tmp/GreenAI/3080
The data path is: /root/autodl-tmp/GreenAI/3080/ModelsData


## Models

### generate the data paths

In [3]:
models_name = ['alexnet', 
               'vgg11', 'vgg13', 'vgg16', 
               'resnet18', 'resnet34', 'resnet50',
               'googlenet_origin', 'googlenet_mod1', 'googlenet_mod2', 'googlenet_mod3',
               'googlenet_mod4', 'googlenet_mod5', 'googlenet_mod6', 'googlenet_mod7', 
               'googlenet_mod8', 'googlenet_mod9',
               'mobilenetv1_path', 'mobilenetv2_path']

In [4]:
DataList = [Path(f"{data_path}/{i}") for i in models_name]
print(DataList)

[PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/alexnet'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/vgg11'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/vgg13'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/vgg16'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/resnet18'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/resnet34'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/resnet50'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_origin'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod1'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod2'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod3'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod4'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod5'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod6'), PosixPath('/root/autodl-tmp/GreenAI/3080/ModelsData/googlenet_mod7'),

### create the models

#### AlexNet Model

In [5]:
def alexnet(img_channel, num_labels):
    net = nn.Sequential(
        # 这里使用一个11*11的更大窗口来捕捉对象。
        # 同时，步幅为4，以减少输出的高度和宽度。
        # 另外，输出通道的数目远大于LeNet
        nn.Conv2d(img_channel, 64, kernel_size=11, stride=4, padding=2), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        # 减小卷积窗口，使用填充为2来使得输入与输出的高和宽一致，且增大输出通道数
        nn.Conv2d(64, 192, kernel_size=5, padding=2), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        # 使用三个连续的卷积层和较小的卷积窗口。
        # 除了最后的卷积层，输出通道的数量进一步增加。
        # 在前两个卷积层之后，汇聚层不用于减少输入的高度和宽度
        nn.Conv2d(192, 384, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(384, 256, kernel_size=3, padding=1), nn.ReLU(),
        nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2),
        nn.AdaptiveAvgPool2d((6, 6)),   # 使用全局平均池化对每个通道中所有元素求平均并直接将结果传递到全连接层
        nn.Flatten(),
        # 这里，全连接层的输出数量是LeNet中的好几倍。使用dropout层来减轻过拟合
        nn.Linear(256 * 6 * 6, 4096), nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(4096, 4096), nn.ReLU(),
        nn.Dropout(p=0.5),
        # 最后是输出层。由于这里使用Fashion-MNIST，所以用类别数为10，而非论文中的1000
        nn.Linear(4096, num_labels))
    return net

#### VGG Models

##### VGG11 Model

In [6]:
def vgg11_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

def vgg11(input_channels, output_channels):
    conv_arch = [(1, 64), (1, 128), (2, 256), (2, 512), (2, 512)]
    conv_arch = [(1, 64), (1, 128), (2, 256), (2, 512), (2, 512)]
    in_channels = input_channels  # For RGB images
    # Create convolutional layers
    conv_layers = []
    for num_convs, out_channels in conv_arch:
        conv_layers.append(vgg11_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_layers, nn.Flatten(),
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, output_channels)  # Output layer for 1000 classes
    )

##### VGG13 Model

In [7]:
def vgg13_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

def vgg13(input_channels, output_channels):
    # VGG-13 architecture
    conv_arch = [(2, 64), (2, 128), (2, 256), (2, 512), (2, 512)]
    in_channels = input_channels  # For RGB images
    # Create convolutional layers
    conv_layers = []
    for num_convs, out_channels in conv_arch:
        conv_layers.append(vgg13_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_layers, nn.Flatten(),
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, output_channels)  # Output layer for 1000 classes
    )

##### VGG16 Model

In [8]:
def vgg16_block(num_convs, in_channels, out_channels):
    layers = []
    for _ in range(num_convs):
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        in_channels = out_channels
    layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
    return nn.Sequential(*layers)

def vgg16(input_channels, output_channels):
    conv_arch = [(2, 64), (2, 128), (3, 256), (3, 512), (3, 512)]
    in_channels = input_channels  # For RGB images
    # Create convolutional layers
    conv_layers = []
    for num_convs, out_channels in conv_arch:
        conv_layers.append(vgg16_block(num_convs, in_channels, out_channels))
        in_channels = out_channels

    return nn.Sequential(
        *conv_layers, nn.Flatten(),
        nn.Linear(out_channels * 7 * 7, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, 4096), nn.ReLU(), nn.Dropout(0.5),
        nn.Linear(4096, output_channels)  # Output layer for 1000 classes
    )

#### ResNet Models

##### ResNet18 Model

In [4]:
class Residual18(nn.Module):  #@save
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                               kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                               kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)
    
    
def resnet18(img_channel, num_labels):
    # blk = Residual(3,6, use_1x1conv=True, strides=2)

    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                    nn.BatchNorm2d(64), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

    def resnet_block(input_channels, num_channels, num_residuals,
                    first_block=False):
        blk = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.append(Residual18(input_channels, num_channels,
                                    use_1x1conv=True, strides=2))
            else:
                blk.append(Residual18(num_channels, num_channels))
        return blk

    b2 = nn.Sequential(*resnet_block(64, 64, 2, first_block=True))
    b3 = nn.Sequential(*resnet_block(64, 128, 2))
    b4 = nn.Sequential(*resnet_block(128, 256, 2))
    b5 = nn.Sequential(*resnet_block(256, 512, 2))

    net = nn.Sequential(b1, b2, b3, b4, b5,
                        nn.AdaptiveAvgPool2d((1,1)),
                        nn.Flatten(), nn.Linear(512, num_labels))
    return net

NameError: name 'nn' is not defined

##### ResNet34 Model

In [10]:
class Residual34(nn.Module):  #@save
    def __init__(self, input_channels, num_channels,
                 use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels,
                               kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels,
                               kernel_size=3, padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)
    
    
def resnet34(img_channel, num_labels):
    # blk = Residual(3,6, use_1x1conv=True, strides=2)

    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                    nn.BatchNorm2d(64), nn.ReLU(),
                    nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

    def resnet_block(input_channels, num_channels, num_residuals,
                    first_block=False):
        blk = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.append(Residual34(input_channels, num_channels,
                                    use_1x1conv=True, strides=2))
            else:
                blk.append(Residual34(num_channels, num_channels))
        return blk

    b2 = nn.Sequential(*resnet_block(64, 64, 3, first_block=True))
    b3 = nn.Sequential(*resnet_block(64, 128, 4))
    b4 = nn.Sequential(*resnet_block(128, 256, 6))
    b5 = nn.Sequential(*resnet_block(256, 512, 3))

    net = nn.Sequential(b1, b2, b3, b4, b5,
                        nn.AdaptiveAvgPool2d((1,1)),
                        nn.Flatten(), nn.Linear(512, num_labels))
    return net

##### ResNet50 Model

In [11]:
class Residual50(nn.Module):
    def __init__(self, input_channels, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels , kernel_size=1, stride=strides, bias=False)
        self.bn1 = nn.BatchNorm2d(num_channels )
        self.conv2 = nn.Conv2d(num_channels , num_channels , kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(num_channels )
        self.conv3 = nn.Conv2d(num_channels , num_channels, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(num_channels)
        
        if use_1x1conv or strides != 1:
            self.downsample = nn.Sequential(
                nn.Conv2d(input_channels, num_channels, kernel_size=1, stride=strides, bias=False),
                nn.BatchNorm2d(num_channels)
            )
        else:
            self.downsample = None

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = F.relu(self.bn2(self.conv2(Y)))
        Y = self.bn3(self.conv3(Y))
        if self.downsample:
            X = self.downsample(X)
        Y += X
        return F.relu(Y)

def resnet50(img_channel, num_labels):
    b1 = nn.Sequential(
        nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
        nn.BatchNorm2d(64), nn.ReLU(),
        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
    )

    def resnet_block(input_channels, num_channels, num_residuals, first_block=False):
        blk = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.append(Residual50(input_channels, num_channels, use_1x1conv=True, strides=2))
            else:
                blk.append(Residual50(num_channels, num_channels))
        return blk

    b2 = nn.Sequential(*resnet_block(64, 64, 3, first_block=True))
    b3 = nn.Sequential(*resnet_block(64, 128, 4))
    b4 = nn.Sequential(*resnet_block(128, 256, 6))
    b5 = nn.Sequential(*resnet_block(256, 512, 3))

    net = nn.Sequential(
        b1, b2, b3, b4, b5,
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(), nn.Linear(512, num_labels)
    )
    return net

#### GoogleNet Models

##### GoogleNet Model(orinigal)

In [12]:
class Inception(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # 在通道维度上连结输出
        return torch.cat((p1, p2, p3, p4), dim=1)
    
def Googlenet(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
                   Inception(256, 128, (128, 192), (32, 96), 64),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
                   Inception(512, 160, (112, 224), (24, 64), 64),
                   Inception(512, 128, (128, 256), (24, 64), 64),
                   Inception(512, 112, (144, 288), (32, 64), 64),
                   Inception(528, 256, (160, 320), (32, 128), 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
                   Inception(832, 384, (192, 384), (48, 128), 128),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024, num_labels))
    return net

##### GoogleNet modified version1 Model

In [13]:
class Inception_mod1(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception_mod1, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        # self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        # p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # 在通道维度上连结输出
        return torch.cat((p2, p3, p4), dim=1)
    
def Googlenet_mod1(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b3 = nn.Sequential(Inception_mod1(192, 64, (96, 128), (16, 32), 32),
                   Inception_mod1(256-64, 128, (128, 192), (32, 96), 64),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b4 = nn.Sequential(Inception_mod1(480-128, 192, (96, 208), (16, 48), 64),
                   Inception_mod1(512-192, 160, (112, 224), (24, 64), 64),
                   Inception_mod1(512-160, 128, (128, 256), (24, 64), 64),
                   Inception_mod1(512-128, 112, (144, 288), (32, 64), 64),
                   Inception_mod1(528-112, 256, (160, 320), (32, 128), 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b5 = nn.Sequential(Inception_mod1(832-256, 256, (160, 320), (32, 128), 128),
                   Inception_mod1(832-256, 384, (192, 384), (48, 128), 128),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024-384, num_labels))
    return net

##### GoogleNet modified version2 Model

In [14]:
class Inception_mod2(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception_mod2, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        # p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # 在通道维度上连结输出
        return torch.cat((p1, p3, p4), dim=1)
    
def Googlenet_mod2(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b3 = nn.Sequential(Inception_mod2(192, 64, (96, 128), (16, 32), 32),
                   Inception_mod2(256-128, 128, (128, 192), (32, 96), 64),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b4 = nn.Sequential(Inception_mod2(480-192, 192, (96, 208), (16, 48), 64),
                   Inception_mod2(512-208, 160, (112, 224), (24, 64), 64),
                   Inception_mod2(512-224, 128, (128, 256), (24, 64), 64),
                   Inception_mod2(512-256, 112, (144, 288), (32, 64), 64),
                   Inception_mod2(528-288, 256, (160, 320), (32, 128), 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b5 = nn.Sequential(Inception_mod2(832-320, 256, (160, 320), (32, 128), 128),
                   Inception_mod2(832-320, 384, (192, 384), (48, 128), 128),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024-384, num_labels))
    return net

##### GoogleNet modified version3 Model

In [15]:
class Inception_mod3(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception_mod3, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        # self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        # self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        # p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        # 在通道维度上连结输出
        return torch.cat((p1, p2, p4), dim=1)
    
def Googlenet_mod3(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b3 = nn.Sequential(Inception_mod3(192, 64, (96, 128), (16, 32), 32),
                   Inception_mod3(256-32, 128, (128, 192), (32, 96), 64),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b4 = nn.Sequential(Inception_mod3(480-96, 192, (96, 208), (16, 48), 64),
                   Inception_mod3(512-48, 160, (112, 224), (24, 64), 64),
                   Inception_mod3(512-64, 128, (128, 256), (24, 64), 64),
                   Inception_mod3(512-64, 112, (144, 288), (32, 64), 64),
                   Inception_mod3(528-64, 256, (160, 320), (32, 128), 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b5 = nn.Sequential(Inception_mod3(832-128, 256, (160, 320), (32, 128), 128),
                   Inception_mod3(832-128, 384, (192, 384), (48, 128), 128),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024-128, num_labels))
    return net

##### GoogleNet modified version4 Model

In [16]:
class Inception_mod4(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception_mod4, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        # self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        # self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        # p4 = F.relu(self.p4_2(self.p4_1(x)))
        # 在通道维度上连结输出
        return torch.cat((p1, p2, p3), dim=1)
    
def Googlenet_mod4(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b3 = nn.Sequential(Inception_mod4(192, 64, (96, 128), (16, 32), 32),
                   Inception_mod4(256-32, 128, (128, 192), (32, 96), 64),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b4 = nn.Sequential(Inception_mod4(480-64, 192, (96, 208), (16, 48), 64),
                   Inception_mod4(512-64, 160, (112, 224), (24, 64), 64),
                   Inception_mod4(512-64, 128, (128, 256), (24, 64), 64),
                   Inception_mod4(512-64, 112, (144, 288), (32, 64), 64),
                   Inception_mod4(528-64, 256, (160, 320), (32, 128), 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b5 = nn.Sequential(Inception_mod4(832-128, 256, (160, 320), (32, 128), 128),
                   Inception_mod4(832-128, 384, (192, 384), (48, 128), 128),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024-128, num_labels))
    return net

##### GoogleNet modified version5 Model

In [17]:
class Inception_mod5(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c2, **kwargs):
        super(Inception_mod5, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        # self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        # self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        # self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        # self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        # self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        # p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        # p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        # p4 = F.relu(self.p4_2(self.p4_1(x)))
        # 在通道维度上连结输出
        return torch.cat([p2], dim=1)
    
def Googlenet_mod5(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b3 = nn.Sequential(Inception_mod5(192, (96, 128)),
                   Inception_mod5(128, (128, 192)),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b4 = nn.Sequential(Inception_mod5(192, (96, 208)),
                   Inception_mod5(208, (112, 224)),
                   Inception_mod5(224, (128, 256)),
                   Inception_mod5(256, (144, 288)),
                   Inception_mod5(288, (160, 320)),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b5 = nn.Sequential(Inception_mod5(320, (160, 320)),
                   Inception_mod5(320, (192, 384)),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(384, num_labels))
    return net

##### GoogleNet modified version6 Model

In [18]:
class Inception_mod6(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c2, **kwargs):
        super(Inception_mod6, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        # self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        # self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        # self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        # p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        # p4 = F.relu(self.p4_2(self.p4_1(x)))
        # 在通道维度上连结输出
        return torch.cat((p2, p3), dim=1)
    
def Googlenet_mod6(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b3 = nn.Sequential(Inception_mod6(192, (96, 128)),
                   Inception_mod6(128*2, (128, 192)),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b4 = nn.Sequential(Inception_mod6(192*2, (96, 208)),
                   Inception_mod6(208*2, (112, 224)),
                   Inception_mod6(224*2, (128, 256)),
                   Inception_mod6(256*2, (144, 288)),
                   Inception_mod6(288*2, (160, 320)),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b5 = nn.Sequential(Inception_mod6(320*2, (160, 320)),
                   Inception_mod6(320*2, (192, 384)),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(384*2, num_labels))
    return net

##### GoogleNet modified version7 Model

In [19]:
class Inception_mod7(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c2, **kwargs):
        super(Inception_mod7, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        # self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        self.p4_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p4_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)

    def forward(self, x):
        # p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(F.relu(self.p4_1(x))))
        # 在通道维度上连结输出
        return torch.cat((p2, p3, p4), dim=1)
    
def Googlenet_mod7(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b3 = nn.Sequential(Inception_mod7(192, (96, 128)),
                   Inception_mod7(128*3, (128, 192)),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b4 = nn.Sequential(Inception_mod7(192*3, (96, 208)),
                   Inception_mod7(208*3, (112, 224)),
                   Inception_mod7(224*3, (128, 256)),
                   Inception_mod7(256*3, (144, 288)),
                   Inception_mod7(288*3, (160, 320)),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b5 = nn.Sequential(Inception_mod7(320*3, (160, 320)),
                   Inception_mod7(320*3, (192, 384)),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(384*3, num_labels))
    return net

##### GoogleNet modified version8 Model

In [20]:
class Inception_mod8(nn.Module):
    # c1--c4是每条路径的输出通道数
    def __init__(self, in_channels, c2, **kwargs):
        super(Inception_mod8, self).__init__(**kwargs)
        # 线路1，单1x1卷积层
        self.p1_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p1_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路2，1x1卷积层后接3x3卷积层
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路3，1x1卷积层后接5x5卷积层
        self.p3_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # 线路4，3x3最大汇聚层后接1x1卷积层
        self.p4_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p4_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)

    def forward(self, x):
        p1 = F.relu(self.p1_2(F.relu(self.p1_1(x))))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(F.relu(self.p4_1(x))))
        # 在通道维度上连结输出
        return torch.cat((p1, p2, p3, p4), dim=1)
    
def Googlenet_mod8(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b3 = nn.Sequential(Inception_mod8(192, (96, 128)),
                   Inception_mod8(128*4, (128, 192)),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b4 = nn.Sequential(Inception_mod8(192*4, (96, 208)),
                   Inception_mod8(208*4, (112, 224)),
                   Inception_mod8(224*4, (128, 256)),
                   Inception_mod8(256*4, (144, 288)),
                   Inception_mod8(288*4, (160, 320)),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b5 = nn.Sequential(Inception_mod8(320*4, (160, 320)),
                   Inception_mod8(320*4, (192, 384)),
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(384*4, num_labels))
    return net

##### GoogleNet modified version9 Model

In [21]:
# class Inception_mod8(nn.Module):
#     # c1--c4是每条路径的输出通道数
#     def __init__(self, in_channels, c2, **kwargs):
#         super(Inception_mod8, self).__init__(**kwargs)
#         # 线路1，单1x1卷积层
#         self.p1_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
#         self.p1_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
#         # 线路2，1x1卷积层后接3x3卷积层
#         self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
#         self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
#         # 线路3，1x1卷积层后接5x5卷积层
#         self.p3_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
#         self.p3_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
#         # 线路4，3x3最大汇聚层后接1x1卷积层
#         self.p4_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
#         self.p4_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)

#     def forward(self, x):
#         p1 = F.relu(self.p1_2(F.relu(self.p1_1(x))))
#         p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
#         p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
#         p4 = F.relu(self.p4_2(F.relu(self.p4_1(x))))
#         # 在通道维度上连结输出
#         return torch.cat((p1, p2, p3, p4), dim=1)
    
def Googlenet_mod9(img_channel, num_labels):
    b1 = nn.Sequential(nn.Conv2d(img_channel, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1),
                   nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.ReLU(),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    # b3 = nn.Sequential(Inception_mod8(192, (96, 128)),
    #                Inception_mod8(128*4, (128, 192)),
    #                nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    # b4 = nn.Sequential(Inception_mod8(192*4, (96, 208)),
    #                Inception_mod8(208*4, (112, 224)),
    #                Inception_mod8(224*4, (128, 256)),
    #                Inception_mod8(256*4, (144, 288)),
    #                Inception_mod8(288*4, (160, 320)),
    #                nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    
    # b5 = nn.Sequential(Inception_mod8(320*4, (160, 320)),
    #                Inception_mod8(320*4, (192, 384)),
    #                nn.AdaptiveAvgPool2d((1,1)),
    #                nn.Flatten())
    
    b5 = nn.Sequential(
                   nn.AdaptiveAvgPool2d((1,1)),
                   nn.Flatten())

    net = nn.Sequential(b1, b2, b5, nn.Linear(192, num_labels))
    return net

#### MobileNet Models

##### MobileNetV1 Model

In [22]:
class DepthwiseSeparableConv(nn.Module):
    def __init__(self, in_channels, out_channels, stride):
        super(DepthwiseSeparableConv, self).__init__()
        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=3, stride=stride, padding=1, groups=in_channels)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class MobileNet(nn.Module):
    def __init__(self, input_channels, output_channels):
        super(MobileNet, self).__init__()
        self.model = nn.Sequential(
            nn.Conv2d(input_channels, 32, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),

            DepthwiseSeparableConv(32, 64, 1),
            DepthwiseSeparableConv(64, 128, 2),
            DepthwiseSeparableConv(128, 128, 1),
            DepthwiseSeparableConv(128, 256, 2),
            DepthwiseSeparableConv(256, 256, 1),
            DepthwiseSeparableConv(256, 512, 2),

            # Typically, 5 Depthwise Separable Convolutions are repeated here, each with stride 1
            *[DepthwiseSeparableConv(512, 512, 1) for _ in range(5)],

            DepthwiseSeparableConv(512, 1024, 2),
            DepthwiseSeparableConv(1024, 1024, 1),

            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(1024, output_channels)
        )

    def forward(self, x):
        return self.model(x)

##### MobileNetV2 Model

In [23]:
class InvertedResidual(nn.Module):
    def __init__(self, in_channels, out_channels, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        hidden_dim = in_channels * expand_ratio
        self.use_residual = self.stride == 1 and in_channels == out_channels

        self.layers = nn.Sequential(
            # expand
            nn.Conv2d(in_channels, hidden_dim, 1, 1, 0, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.ReLU6(inplace=True),

            # depthwise
            nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
            nn.BatchNorm2d(hidden_dim),
            nn.ReLU6(inplace=True),

            # project
            nn.Conv2d(hidden_dim, out_channels, 1, 1, 0, bias=False),
            nn.BatchNorm2d(out_channels)
        )

    def forward(self, x):
        if self.use_residual:
            return x + self.layers(x)
        else:
            return self.layers(x)

class MobileNetV2(nn.Module):
    def __init__(self, input_channels, output_channels):
        super(MobileNetV2, self).__init__()
        self.first_layer = nn.Sequential(
            nn.Conv2d(input_channels, 32, 3, 2, 1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU6(inplace=True)
        )

        self.inverted_residual_blocks = nn.Sequential(
            InvertedResidual(32, 16, 1, 1),
            InvertedResidual(16, 24, 2, 6),
            InvertedResidual(24, 24, 1, 6),
            InvertedResidual(24, 32, 2, 6),
            InvertedResidual(32, 32, 1, 6),
            InvertedResidual(32, 32, 1, 6),
            InvertedResidual(32, 64, 2, 6),
            InvertedResidual(64, 64, 1, 6),
            InvertedResidual(64, 64, 1, 6),
            InvertedResidual(64, 64, 1, 6),
            InvertedResidual(64, 96, 1, 6),
            InvertedResidual(96, 96, 1, 6),
            InvertedResidual(96, 96, 1, 6),
            InvertedResidual(96, 160, 2, 6),
            InvertedResidual(160, 160, 1, 6),
            InvertedResidual(160, 160, 1, 6),
            InvertedResidual(160, 320, 1, 6)
        )

        self.last_layers = nn.Sequential(
            nn.Conv2d(320, 1280, 1, 1, 0, bias=False),
            nn.BatchNorm2d(1280),
            nn.ReLU6(inplace=True),
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(1280, output_channels)
        )

    def forward(self, x):
        x = self.first_layer(x)
        x = self.inverted_residual_blocks(x)
        x = self.last_layers(x)
        return x

## Call the Models

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'mps')
print('The device is:', device)

# check if mps on macbook is availabel
# print(torch.backends.mps.is_available())  # 检查 MPS 是否可用
# print(torch.backends.mps.is_built())      # 检查 MPS 是否已编译

The device is: mps


In [None]:
# create list to store all the parameters and the number of MACs, be careful of the different datasets
# to avoid the error of the number of input channels and any other mistake, try to use different dictionaries to store each dataset
# create different empty dictionary
macs_f = {}
paras_f = {}
macs_c100 = {}
paras_c100 = {}
macs_c10 = {}
paras_c10 = {}

### usea function to call the models

In [None]:
# create a function for all the models to run
# image channel for fashion mnist 
channel_f = 1
# image channel for cifar100 and cifar10
channel_c = 3

# number of labels for fashion mnist
num_labels_f = 10
# number of labels for cifar100 
num_labels_c100 = 100
# number of labels for cifar10
num_labels_c10 = 10

def get_model_info(model, img_channel, num_labels):
    model_ini = model.__name__
    print(f'The model name is {model_ini}')

    net = model(img_channel, num_labels)

    # Use a generic context that works for both MPS and CPU
    with torch.device(device):
        macs, params = get_model_complexity_info(net, (img_channel, 224, 224), as_strings=True,
                                                print_per_layer_stat=True, verbose=True)
        print('{:<30}  {:<8}'.format('Computational complexity: ', macs))
        print('{:<30}  {:<8}'.format('Number of parameters: ', params))

    # add the macs and parameters to the dictionary
    if img_channel == 1:
        macs_f[model_ini] = macs
        paras_f[model_ini] = params
    else:
        if num_labels == 100:
            macs_c100[model_ini] = macs
            paras_c100[model_ini] = params
        else:
            macs_c10[model_ini] = macs
            paras_c10[model_ini] = params
    
    return net, macs, params

### ALEXNET

In [None]:
alexnet_f, macs_alexnet_f, paras_alexnet_f = get_model_info(alexnet, channel_f, num_labels_f)
print('-'*50)
alexnet_c100, macs_alexnet_c100, paras_alexnet_c100 = get_model_info(alexnet, channel_c, num_labels_c100)
print('-'*50)
alexnet_c10, macs_alexnet_c10, paras_alexnet_c10 = get_model_info(alexnet, channel_c, num_labels_c10)

The model name is alexnet
Sequential(
  57.03 M, 100.000% Params, 664.65 MMac, 99.736% MACs, 
  (0): Conv2d(7.81 k, 0.014% Params, 23.62 MMac, 3.544% MACs, 1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (1): ReLU(0, 0.000% Params, 193.6 KMac, 0.029% MACs, )
  (2): MaxPool2d(0, 0.000% Params, 193.6 KMac, 0.029% MACs, kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (3): Conv2d(307.39 k, 0.539% Params, 224.09 MMac, 33.626% MACs, 64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (4): ReLU(0, 0.000% Params, 139.97 KMac, 0.021% MACs, )
  (5): MaxPool2d(0, 0.000% Params, 139.97 KMac, 0.021% MACs, kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (6): Conv2d(663.94 k, 1.164% Params, 112.21 MMac, 16.837% MACs, 192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (7): ReLU(0, 0.000% Params, 64.9 KMac, 0.010% MACs, )
  (8): Conv2d(884.99 k, 1.552% Params, 149.56 MMac, 22.443% MACs, 384, 256, kernel_size=(3, 3), stride=(1, 1)

### ResNet

#### ResNet18

In [None]:
resnet18_f, macs_resnet18_f, paras_resnet18_f = get_model_info(resnet18, channel_f, num_labels_f)
print('-'*50)
resnet18_c100, macs_resnet18_c100, paras_resnet18_c100 = get_model_info(resnet18, channel_c, num_labels_c100)
print('-'*50)
resnet18_c10, macs_resnet18_c10, paras_resnet18_c10 = get_model_info(resnet18, channel_c, num_labels_c10)

The model name is resnet18
Sequential(
  11.18 M, 100.000% Params, 1.74 GMac, 99.642% MACs, 
  (0): Sequential(
    3.33 k, 0.030% Params, 43.35 MMac, 2.477% MACs, 
    (0): Conv2d(3.2 k, 0.029% Params, 40.14 MMac, 2.294% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(128, 0.001% Params, 1.61 MMac, 0.092% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(0, 0.000% Params, 802.82 KMac, 0.046% MACs, )
    (3): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.046% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    148.22 k, 1.326% Params, 464.83 MMac, 26.563% MACs, 
    (0): Residual18(
      74.11 k, 0.663% Params, 232.42 MMac, 13.282% MACs, 
      (conv1): Conv2d(36.93 k, 0.330% Params, 115.81 MMac, 6.618% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(36.93 k, 0.330% Params, 115.81 MMac, 6.618% MACs, 64, 64, kernel_size=(3, 3), str

#### ResNet34

In [None]:
resnet34_f, macs_resnet34_f, paras_resnet34_f = get_model_info(resnet34, channel_f, num_labels_f)
print('-'*50)
resnet34_c100, macs_resnet34_c100, paras_resnet34_c100 = get_model_info(resnet34, channel_c, num_labels_c100)
print('-'*50)
resnet34_c10, macs_resnet34_c10, paras_resnet34_c10 = get_model_info(resnet34, channel_c, num_labels_c10)

The model name is resnet34
Sequential(
  21.29 M, 100.000% Params, 3.6 GMac, 99.756% MACs, 
  (0): Sequential(
    3.33 k, 0.016% Params, 43.35 MMac, 1.202% MACs, 
    (0): Conv2d(3.2 k, 0.015% Params, 40.14 MMac, 1.113% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(128, 0.001% Params, 1.61 MMac, 0.045% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(0, 0.000% Params, 802.82 KMac, 0.022% MACs, )
    (3): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.022% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    222.34 k, 1.044% Params, 697.25 MMac, 19.336% MACs, 
    (0): Residual34(
      74.11 k, 0.348% Params, 232.42 MMac, 6.445% MACs, 
      (conv1): Conv2d(36.93 k, 0.173% Params, 115.81 MMac, 3.212% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (conv2): Conv2d(36.93 k, 0.173% Params, 115.81 MMac, 3.212% MACs, 64, 64, kernel_size=(3, 3), strid

#### ResNet50

In [None]:
resnet50_f, macs_resnet50_f, paras_resnet50_f = get_model_info(resnet50, channel_f, num_labels_f)
print('-'*50)
resnet50_c100, macs_resnet50_c100, paras_resnet50_c100 = get_model_info(resnet50, channel_c, num_labels_c100)
print('-'*50)
resnet50_c10, macs_resnet50_c10, paras_resnet50_c10 = get_model_info(resnet50, channel_c, num_labels_c10)

The model name is resnet50
Sequential(
  13.87 M, 100.000% Params, 2.31 GMac, 99.503% MACs, 
  (0): Sequential(
    3.33 k, 0.024% Params, 43.35 MMac, 1.865% MACs, 
    (0): Conv2d(3.2 k, 0.023% Params, 40.14 MMac, 1.727% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): BatchNorm2d(128, 0.001% Params, 1.61 MMac, 0.069% MACs, 64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(0, 0.000% Params, 802.82 KMac, 0.035% MACs, )
    (3): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.035% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    136.32 k, 0.983% Params, 427.5 MMac, 18.393% MACs, 
    (0): Residual50(
      45.44 k, 0.328% Params, 142.5 MMac, 6.131% MACs, 
      (conv1): Conv2d(4.1 k, 0.030% Params, 12.85 MMac, 0.553% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, 0.001% Params, 401.41 KMac, 0.017% MACs, 64, eps=1e-05, momentum=0.1, affine=True, 

### VGG

#### VGG11

In [None]:
vgg11_f, macs_vgg11_f, paras_vgg11_f = get_model_info(vgg11, channel_f, num_labels_f)
print('-'*50)
vgg11_c100, macs_vgg11_c100, paras_vgg11_c100 = get_model_info(vgg11, channel_c, num_labels_c100)
print('-'*50)
vgg11_c10, macs_vgg11_c10, paras_vgg11_c10 = get_model_info(vgg11, channel_c, num_labels_c10)

The model name is vgg11
Sequential(
  128.81 M, 100.000% Params, 7.57 GMac, 99.643% MACs, 
  (0): Sequential(
    640, 0.000% Params, 38.54 MMac, 0.507% MACs, 
    (0): Conv2d(640, 0.000% Params, 32.11 MMac, 0.423% MACs, 1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 3.21 MMac, 0.042% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 3.21 MMac, 0.042% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    73.86 k, 0.057% Params, 929.66 MMac, 12.240% MACs, 
    (0): Conv2d(73.86 k, 0.057% Params, 926.45 MMac, 12.198% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 1.61 MMac, 0.021% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 1.61 MMac, 0.021% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (2): Sequential(
    885.25 k, 0.687% Params, 2.78 GMac, 36.582% MACs, 
    (0): Conv2d(295.17 k, 0.229% Params, 925.65 MMac, 12.187% MACs, 1

#### VGG13

In [None]:
vgg13_f, macs_vgg13_f, paras_vgg13_f = get_model_info(vgg13, channel_f, num_labels_f)
print('-'*50)
vgg13_c100, macs_vgg13_c100, paras_vgg13_c100 = get_model_info(vgg13, channel_c, num_labels_c100)
print('-'*50)
vgg13_c10, macs_vgg13_c10, paras_vgg13_c10 = get_model_info(vgg13, channel_c, num_labels_c10)

The model name is vgg13
Sequential(
  128.99 M, 100.000% Params, 11.28 GMac, 99.675% MACs, 
  (0): Sequential(
    37.57 k, 0.029% Params, 1.89 GMac, 16.746% MACs, 
    (0): Conv2d(640, 0.000% Params, 32.11 MMac, 0.284% MACs, 1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 3.21 MMac, 0.028% MACs, )
    (2): Conv2d(36.93 k, 0.029% Params, 1.85 GMac, 16.377% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 3.21 MMac, 0.028% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 3.21 MMac, 0.028% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    221.44 k, 0.172% Params, 2.78 GMac, 24.594% MACs, 
    (0): Conv2d(73.86 k, 0.057% Params, 926.45 MMac, 8.189% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 1.61 MMac, 0.014% MACs, )
    (2): Conv2d(147.58 k, 0.114% Params, 1.85 GMac, 16.363% MACs, 128, 128, kernel_size=

#### VGG16

In [None]:
vgg16_f, macs_vgg16_f, paras_vgg16_f = get_model_info(vgg16, channel_f, num_labels_f)
print('-'*50)
vgg16_c100, macs_vgg16_c100, paras_vgg16_c100 = get_model_info(vgg16, channel_c, num_labels_c100)
print('-'*50)
vgg16_c10, macs_vgg16_c10, paras_vgg16_c10 = get_model_info(vgg16, channel_c, num_labels_c10)

The model name is vgg16
Sequential(
  134.3 M, 100.000% Params, 15.44 GMac, 99.746% MACs, 
  (0): Sequential(
    37.57 k, 0.028% Params, 1.89 GMac, 12.239% MACs, 
    (0): Conv2d(640, 0.000% Params, 32.11 MMac, 0.207% MACs, 1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 3.21 MMac, 0.021% MACs, )
    (2): Conv2d(36.93 k, 0.027% Params, 1.85 GMac, 11.969% MACs, 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 3.21 MMac, 0.021% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 3.21 MMac, 0.021% MACs, kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    221.44 k, 0.165% Params, 2.78 GMac, 17.974% MACs, 
    (0): Conv2d(73.86 k, 0.055% Params, 926.45 MMac, 5.984% MACs, 64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(0, 0.000% Params, 1.61 MMac, 0.010% MACs, )
    (2): Conv2d(147.58 k, 0.110% Params, 1.85 GMac, 11.958% MACs, 128, 128, kernel_size=(

### MobileNet

#### MobileNetV1

In [None]:
mobilenetv1_f, macs_mobilenetv1_f, paras_mobilenetv1_f = get_model_info(MobileNet, channel_f, num_labels_f)
print('-'*50)
mobilenetv1_c100, macs_mobilenetv1_c100, paras_mobilenetv1_c100 = get_model_info(MobileNet, channel_c, num_labels_c100)
print('-'*50)
mobilenetv1_c10, macs_mobilenetv1_c10, paras_mobilenetv1_c10 = get_model_info(MobileNet, channel_c, num_labels_c10)

The model name is MobileNet
MobileNet(
  3.21 M, 100.000% Params, 566.8 MMac, 99.841% MACs, 
  (model): Sequential(
    3.21 M, 100.000% Params, 566.8 MMac, 99.841% MACs, 
    (0): Conv2d(320, 0.010% Params, 4.01 MMac, 0.707% MACs, 1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): BatchNorm2d(64, 0.002% Params, 802.82 KMac, 0.141% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(0, 0.000% Params, 401.41 KMac, 0.071% MACs, )
    (3): DepthwiseSeparableConv(
      2.43 k, 0.076% Params, 30.51 MMac, 5.374% MACs, 
      (depthwise): Conv2d(320, 0.010% Params, 4.01 MMac, 0.707% MACs, 32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32)
      (pointwise): Conv2d(2.11 k, 0.066% Params, 26.49 MMac, 4.667% MACs, 32, 64, kernel_size=(1, 1), stride=(1, 1))
    )
    (4): DepthwiseSeparableConv(
      8.96 k, 0.279% Params, 28.1 MMac, 4.950% MACs, 
      (depthwise): Conv2d(640, 0.020% Params, 2.01 MMac, 0.354% MACs, 64, 64, 

#### MobileNetV2

In [None]:
mobilenetv2_f, macs_mobilenetv2_f, paras_mobilenetv2_f = get_model_info(MobileNetV2, channel_f, num_labels_f)
print('-'*50)
mobilenetv2_c100, macs_mobilenetv2_c100, paras_mobilenetv2_c100 = get_model_info(MobileNetV2, channel_c, num_labels_c100)
print('-'*50)
mobilenetv2_c10, macs_mobilenetv2_c10, paras_mobilenetv2_c10 = get_model_info(MobileNetV2, channel_c, num_labels_c10)

The model name is MobileNetV2
MobileNetV2(
  2.24 M, 100.000% Params, 325.86 MMac, 99.895% MACs, 
  (first_layer): Sequential(
    352, 0.016% Params, 4.82 MMac, 1.477% MACs, 
    (0): Conv2d(288, 0.013% Params, 3.61 MMac, 1.108% MACs, 1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(64, 0.003% Params, 802.82 KMac, 0.246% MACs, 32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU6(0, 0.000% Params, 401.41 KMac, 0.123% MACs, inplace=True)
  )
  (inverted_residual_blocks): Sequential(
    1.81 M, 80.989% Params, 300.7 MMac, 92.185% MACs, 
    (0): InvertedResidual(
      1.98 k, 0.089% Params, 25.69 MMac, 7.876% MACs, 
      (layers): Sequential(
        1.98 k, 0.089% Params, 25.69 MMac, 7.876% MACs, 
        (0): Conv2d(1.02 k, 0.046% Params, 12.85 MMac, 3.938% MACs, 32, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, 0.003% Params, 802.82 KMac, 0.246% MACs, 32, eps=1e-05, momentum=0.

### GoogleNet

#### GoogleNet_origin

In [None]:
googlenet_f, macs_googlenet_f, paras_googlenet_f = get_model_info(Googlenet, channel_f, num_labels_f)
print('-'*50)
googlenet_c100, macs_googlenet_c100, paras_googlenet_c100 = get_model_info(Googlenet, channel_c, num_labels_c100)
print('-'*50)
googlenet_c10, macs_googlenet_c10, paras_googlenet_c10 = get_model_info(Googlenet, channel_c, num_labels_c10)

The model name is Googlenet
Sequential(
  5.98 M, 100.000% Params, 1.51 GMac, 99.192% MACs, 
  (0): Sequential(
    3.2 k, 0.054% Params, 41.75 MMac, 2.741% MACs, 
    (0): Conv2d(3.2 k, 0.054% Params, 40.14 MMac, 2.636% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.053% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.053% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 1.923% Params, 361.87 MMac, 23.760% MACs, 
    (0): Conv2d(4.16 k, 0.070% Params, 13.05 MMac, 0.857% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.013% MACs, )
    (2): Conv2d(110.78 k, 1.853% Params, 347.42 MMac, 22.811% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.040% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.040% MACs, kernel_size=3, stride=2, paddi

#### GoogleNet_mod1

In [None]:
googlenet_mod1_f, macs_googlenet_mod1_f, paras_googlenet_mod1_f = get_model_info(Googlenet_mod1, channel_f, num_labels_f)
print('-'*50)
googlenet_mod1_c100, macs_googlenet_mod1_c100, paras_googlenet_mod1_c100 = get_model_info(Googlenet_mod1, channel_c, num_labels_c100)
print('-'*50)
googlenet_mod1_c10, macs_googlenet_mod1_c10, paras_googlenet_mod1_c10 = get_model_info(Googlenet_mod1, channel_c, num_labels_c10)

The model name is Googlenet_mod1
Sequential(
  4.61 M, 100.000% Params, 1.31 GMac, 99.181% MACs, 
  (0): Sequential(
    3.2 k, 0.069% Params, 41.75 MMac, 3.155% MACs, 
    (0): Conv2d(3.2 k, 0.069% Params, 40.14 MMac, 3.034% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.061% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.061% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 2.493% Params, 361.87 MMac, 27.350% MACs, 
    (0): Conv2d(4.16 k, 0.090% Params, 13.05 MMac, 0.986% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.015% MACs, )
    (2): Conv2d(110.78 k, 2.403% Params, 347.42 MMac, 26.258% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.046% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.046% MACs, kernel_size=3, stride=2, 

#### GoogleNet_mod2

In [None]:
googlenet_mod2_f, macs_googlenet_mod2_f, paras_googlenet_mod2_f = get_model_info(Googlenet_mod2, channel_f, num_labels_f)
print('-'*50)
googlenet_mod2_c100, macs_googlenet_mod2_c100, paras_googlenet_mod2_c100 = get_model_info(Googlenet_mod2, channel_c, num_labels_c100)
print('-'*50)
googlenet_mod2_c10, macs_googlenet_mod2_c10, paras_googlenet_mod2_c10 = get_model_info(Googlenet_mod2, channel_c, num_labels_c10)

The model name is Googlenet_mod2
Sequential(
  5.03 M, 100.000% Params, 673.18 MMac, 98.622% MACs, 
  (0): Sequential(
    3.2 k, 0.064% Params, 41.75 MMac, 6.116% MACs, 
    (0): Conv2d(3.2 k, 0.064% Params, 40.14 MMac, 5.881% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.118% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.118% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 2.287% Params, 361.87 MMac, 53.015% MACs, 
    (0): Conv2d(4.16 k, 0.083% Params, 13.05 MMac, 1.911% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.029% MACs, )
    (2): Conv2d(110.78 k, 2.205% Params, 347.42 MMac, 50.898% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.088% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.088% MACs, kernel_size=3, stride=2

#### GoogleNet_mod3

In [None]:
googlenet_mod3_f, macs_googlenet_mod3_f, paras_googlenet_mod3_f = get_model_info(Googlenet_mod3, channel_f, num_labels_f)
print('-'*50)
googlenet_mod3_c100, macs_googlenet_mod3_c100, paras_googlenet_mod3_c100 = get_model_info(Googlenet_mod3, channel_c, num_labels_c100)
print('-'*50)
googlenet_mod3_c10, macs_googlenet_mod3_c10, paras_googlenet_mod3_c10 = get_model_info(Googlenet_mod3, channel_c, num_labels_c10)

The model name is Googlenet_mod3
Sequential(
  4.94 M, 100.000% Params, 1.31 GMac, 99.141% MACs, 
  (0): Sequential(
    3.2 k, 0.065% Params, 41.75 MMac, 3.152% MACs, 
    (0): Conv2d(3.2 k, 0.065% Params, 40.14 MMac, 3.031% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.061% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.061% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 2.326% Params, 361.87 MMac, 27.321% MACs, 
    (0): Conv2d(4.16 k, 0.084% Params, 13.05 MMac, 0.985% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.015% MACs, )
    (2): Conv2d(110.78 k, 2.242% Params, 347.42 MMac, 26.230% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.045% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.045% MACs, kernel_size=3, stride=2, 

#### GoogleNet_mod4

In [None]:
googlenet_mod4_f, macs_googlenet_mod4_f, paras_googlenet_mod4_f = get_model_info(Googlenet_mod4, channel_f, num_labels_f)
print('-'*50)
googlenet_mod4_c100, macs_googlenet_mod4_c100, paras_googlenet_mod4_c100 = get_model_info(Googlenet_mod4, channel_c, num_labels_c100)
print('-'*50)
googlenet_mod4_c10, macs_googlenet_mod4_c10, paras_googlenet_mod4_c10 = get_model_info(Googlenet_mod4, channel_c, num_labels_c10)

The model name is Googlenet_mod4
Sequential(
  5.29 M, 100.000% Params, 1.41 GMac, 99.298% MACs, 
  (0): Sequential(
    3.2 k, 0.060% Params, 41.75 MMac, 2.943% MACs, 
    (0): Conv2d(3.2 k, 0.060% Params, 40.14 MMac, 2.829% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.057% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.057% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 2.171% Params, 361.87 MMac, 25.508% MACs, 
    (0): Conv2d(4.16 k, 0.079% Params, 13.05 MMac, 0.920% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.014% MACs, )
    (2): Conv2d(110.78 k, 2.093% Params, 347.42 MMac, 24.489% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.042% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.042% MACs, kernel_size=3, stride=2, 

#### GoogleNet_mod5

In [None]:
googlenet_mod5_f, macs_googlenet_mod5_f, paras_googlenet_mod5_f = get_model_info(Googlenet_mod5, channel_f, num_labels_f)
print('-'*50)
googlenet_mod5_c100, macs_googlenet_mod5_c100, paras_googlenet_mod5_c100 = get_model_info(Googlenet_mod5, channel_c, num_labels_c100)
print('-'*50)
googlenet_mod5_c10, macs_googlenet_mod5_c10, paras_googlenet_mod5_c10 = get_model_info(Googlenet_mod5, channel_c, num_labels_c10)

The model name is Googlenet_mod5
Sequential(
  3.42 M, 100.000% Params, 1.08 GMac, 99.249% MACs, 
  (0): Sequential(
    3.2 k, 0.094% Params, 41.75 MMac, 3.824% MACs, 
    (0): Conv2d(3.2 k, 0.094% Params, 40.14 MMac, 3.677% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.074% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.074% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 3.364% Params, 361.87 MMac, 33.146% MACs, 
    (0): Conv2d(4.16 k, 0.122% Params, 13.05 MMac, 1.195% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.018% MACs, )
    (2): Conv2d(110.78 k, 3.242% Params, 347.42 MMac, 31.822% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.055% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.055% MACs, kernel_size=3, stride=2, 

#### GoogleNet_mod6

In [None]:
googlenet_mod6_f, macs_googlenet_mod6_f, paras_googlenet_mod6_f = get_model_info(Googlenet_mod6, channel_f, num_labels_f)
print('-'*50)
googlenet_mod6_c100, macs_googlenet_mod6_c100, paras_googlenet_mod6_c100 = get_model_info(Googlenet_mod6, channel_c, num_labels_c100)
print('-'*50)
googlenet_mod6_c10, macs_googlenet_mod6_c10, paras_googlenet_mod6_c10 = get_model_info(Googlenet_mod6, channel_c, num_labels_c10)

The model name is Googlenet_mod6


Sequential(
  7.28 M, 100.000% Params, 1.86 GMac, 99.445% MACs, 
  (0): Sequential(
    3.2 k, 0.044% Params, 41.75 MMac, 2.232% MACs, 
    (0): Conv2d(3.2 k, 0.044% Params, 40.14 MMac, 2.146% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.043% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.043% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 1.579% Params, 361.87 MMac, 19.344% MACs, 
    (0): Conv2d(4.16 k, 0.057% Params, 13.05 MMac, 0.697% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.011% MACs, )
    (2): Conv2d(110.78 k, 1.522% Params, 347.42 MMac, 18.572% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.032% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.032% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=

#### GoogleNet_mod7

In [None]:
googlenet_mod7_f, macs_googlenet_mod7_f, paras_googlenet_mod7_f = get_model_info(Googlenet_mod7, channel_f, num_labels_f)
print('-'*50)
googlenet_mod7_c100, macs_googlenet_mod7_c100, paras_googlenet_mod7_c100 = get_model_info(Googlenet_mod7, channel_c, num_labels_c100)
print('-'*50)
googlenet_mod7_c10, macs_googlenet_mod7_c10, paras_googlenet_mod7_c10 = get_model_info(Googlenet_mod7, channel_c, num_labels_c10)

The model name is Googlenet_mod7
Sequential(
  11.71 M, 100.000% Params, 2.73 GMac, 99.543% MACs, 
  (0): Sequential(
    3.2 k, 0.027% Params, 41.75 MMac, 1.520% MACs, 
    (0): Conv2d(3.2 k, 0.027% Params, 40.14 MMac, 1.462% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.029% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.029% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 0.982% Params, 361.87 MMac, 13.176% MACs, 
    (0): Conv2d(4.16 k, 0.036% Params, 13.05 MMac, 0.475% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.007% MACs, )
    (2): Conv2d(110.78 k, 0.946% Params, 347.42 MMac, 12.650% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.022% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.022% MACs, kernel_size=3, stride=2,

#### GoogleNet_mod8

In [None]:
googlenet_mod8_f, macs_googlenet_mod8_f, paras_googlenet_mod8_f = get_model_info(Googlenet_mod8, channel_f, num_labels_f)
print('-'*50)
googlenet_mod8_c100, macs_googlenet_mod8_c100, paras_googlenet_mod8_c100 = get_model_info(Googlenet_mod8, channel_c, num_labels_c100)
print('-'*50)
googlenet_mod8_c10, macs_googlenet_mod8_c10, paras_googlenet_mod8_c10 = get_model_info(Googlenet_mod8, channel_c, num_labels_c10)

The model name is Googlenet_mod8
Sequential(
  16.7 M, 100.000% Params, 3.7 GMac, 99.604% MACs, 
  (0): Sequential(
    3.2 k, 0.019% Params, 41.75 MMac, 1.122% MACs, 
    (0): Conv2d(3.2 k, 0.019% Params, 40.14 MMac, 1.079% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.022% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.022% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 0.688% Params, 361.87 MMac, 9.730% MACs, 
    (0): Conv2d(4.16 k, 0.025% Params, 13.05 MMac, 0.351% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.005% MACs, )
    (2): Conv2d(110.78 k, 0.663% Params, 347.42 MMac, 9.341% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.016% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.016% MACs, kernel_size=3, stride=2, pad

#### GoogleNet_mod9

In [None]:
googlenet_mod9_f, macs_googlenet_mod9_f, paras_googlenet_mod9_f = get_model_info(Googlenet_mod9, channel_f, num_labels_f)
print('-'*50)
googlenet_mod9_c100, macs_googlenet_mod9_c100, paras_googlenet_mod9_c100 = get_model_info(Googlenet_mod9, channel_c, num_labels_c100)
print('-'*50)
googlenet_mod9_c10, macs_googlenet_mod9_c10, paras_googlenet_mod9_c10 = get_model_info(Googlenet_mod9, channel_c, num_labels_c10)

The model name is Googlenet_mod9
Sequential(
  120.07 k, 100.000% Params, 403.77 MMac, 98.458% MACs, 
  (0): Sequential(
    3.2 k, 2.665% Params, 41.75 MMac, 10.180% MACs, 
    (0): Conv2d(3.2 k, 2.665% Params, 40.14 MMac, 9.788% MACs, 1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU(0, 0.000% Params, 802.82 KMac, 0.196% MACs, )
    (2): MaxPool2d(0, 0.000% Params, 802.82 KMac, 0.196% MACs, kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    114.94 k, 95.728% Params, 361.87 MMac, 88.241% MACs, 
    (0): Conv2d(4.16 k, 3.465% Params, 13.05 MMac, 3.181% MACs, 64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU(0, 0.000% Params, 200.7 KMac, 0.049% MACs, )
    (2): Conv2d(110.78 k, 92.263% Params, 347.42 MMac, 84.718% MACs, 64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(0, 0.000% Params, 602.11 KMac, 0.147% MACs, )
    (4): MaxPool2d(0, 0.000% Params, 602.11 KMac, 0.147% MACs, kernel_size=3, str

In [None]:
# print out the dictionaries of parameters and macs in each dataset
print(macs_f)
print(paras_f)

print('-'*50)

print(macs_c100)
print(paras_c100)

print('-'*50)

print(macs_c10)
print(paras_c10)

print(current_path)

{'alexnet': '666.41 MMac', 'resnet18': '1.75 GMac', 'resnet34': '3.61 GMac', 'resnet50': '2.32 GMac', 'vgg11': '7.6 GMac', 'vgg13': '11.31 GMac', 'vgg16': '15.48 GMac', 'MobileNet': '567.7 MMac', 'MobileNetV2': '326.2 MMac', 'Googlenet': '1.52 GMac', 'Googlenet_mod1': '1.32 GMac', 'Googlenet_mod2': '682.58 MMac', 'Googlenet_mod3': '1.32 GMac', 'Googlenet_mod4': '1.42 GMac', 'Googlenet_mod5': '1.09 GMac', 'Googlenet_mod6': '1.87 GMac', 'Googlenet_mod7': '2.75 GMac', 'Googlenet_mod8': '3.72 GMac', 'Googlenet_mod9': '410.09 MMac'}
{'alexnet': '57.03 M', 'resnet18': '11.18 M', 'resnet34': '21.29 M', 'resnet50': '13.87 M', 'vgg11': '128.81 M', 'vgg13': '128.99 M', 'vgg16': '134.3 M', 'MobileNet': '3.21 M', 'MobileNetV2': '2.24 M', 'Googlenet': '5.98 M', 'Googlenet_mod1': '4.61 M', 'Googlenet_mod2': '5.03 M', 'Googlenet_mod3': '4.94 M', 'Googlenet_mod4': '5.29 M', 'Googlenet_mod5': '3.42 M', 'Googlenet_mod6': '7.28 M', 'Googlenet_mod7': '11.71 M', 'Googlenet_mod8': '16.7 M', 'Googlenet_mod9'

In [None]:
# save the parameters and macs to the csv file
# create a dataframe to store the parameters and macs
macs_paras = pd.DataFrame(list(zip(macs_f.keys(), 
                                   macs_f.values(), paras_f.values(), 
                                   macs_c100.values(), paras_c100.values(), 
                                   macs_c10.values(), paras_c10.values())), 
                                   columns=['Model', 'MACs_f', 'Parameters_f', 'MACs_c100', 'Parameters_c100', 'MACs_c10', 'Parameters_c10'])
macs_paras.head()

macs_paras.to_csv(str(current_path) + '/macs_paras.csv', index=False)

## Import the Datasets for training

### show the output size of each layers after the picture is passed through the model

In [44]:
print(models_name)

['alexnet', 'vgg11', 'vgg13', 'vgg16', 'resnet18', 'resnet34', 'resnet50', 'googlenet_origin', 'googlenet_mod1', 'googlenet_mod2', 'googlenet_mod3', 'googlenet_mod4', 'googlenet_mod5', 'googlenet_mod6', 'googlenet_mod7', 'googlenet_mod8', 'googlenet_mod9', 'mobilenetv1_path', 'mobilenetv2_path']


In [45]:
# create model list according to models_name order
models_f_list = [alexnet_f, 
                vgg11_f, vgg13_f, vgg16_f,
                resnet18_f, resnet34_f, resnet50_f,
                googlenet_f, googlenet_mod1_f, googlenet_mod2_f, googlenet_mod3_f, 
                googlenet_mod4_f, googlenet_mod5_f, googlenet_mod6_f, googlenet_mod7_f,
                googlenet_mod8_f, googlenet_mod9_f,
                mobilenetv1_f, mobilenetv2_f]

models_c100_list = [alexnet_c100,
                vgg11_c100, vgg13_c100, vgg16_c100,
                resnet18_c100, resnet34_c100, resnet50_c100,
                googlenet_c100, googlenet_mod1_c100, googlenet_mod2_c100, googlenet_mod3_c100, 
                googlenet_mod4_c100, googlenet_mod5_c100, googlenet_mod6_c100, googlenet_mod7_c100,
                googlenet_mod8_c100, googlenet_mod9_c100,
                mobilenetv1_c100, mobilenetv2_c100]

models_c10_list = [alexnet_c10,
                vgg11_c10, vgg13_c10, vgg16_c10,
                resnet18_c10, resnet34_c10, resnet50_c10,
                googlenet_c10, googlenet_mod1_c10, googlenet_mod2_c10, googlenet_mod3_c10, 
                googlenet_mod4_c10, googlenet_mod5_c10, googlenet_mod6_c10, googlenet_mod7_c10,
                googlenet_mod8_c10, googlenet_mod9_c10,
                mobilenetv1_c10, mobilenetv2_c10]

In [46]:
# X_f = torch.randn(size=(1, 1, 224, 224), dtype=torch.float32) # fashion mnist

# for model in models_f_list:
#     print(model)
#     for layer in model:
#         X_f=layer(X_f)
#         print(layer.__class__.__name__,'output shape:\t',X_f.shape)
    

### load all the datas:  
    1. FashionMNIST
    2. CIFAR100
    3. CIFAR10

In [47]:
# load the data
# fashion mnist
def get_dataloader_workers():
    """Use 4 processes to read the data.

    Defined in :numref:`sec_utils`"""
    return 4

def load_data_fashion_mnist(batch_size, resize=None):
    """下载Fashion-MNIST数据集, 然后将其加载到内存中

    Defined in :numref:`sec_fashion_mnist`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=get_dataloader_workers()))

def load_data_cifar100(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR100(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR100(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))
    
def load_data_cifar10(batch_size, resize=None):
    """Download the Fashion-MNIST dataset and then load it into memory.

    Defined in :numref:`sec_utils`"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    # import the cifar100 dataset
    cifar_train = torchvision.datasets.CIFAR10(
        root="../data", train=True, transform=trans, download=True)
    cifar_test = torchvision.datasets.CIFAR10(
        root="../data", train=False, transform=trans, download=True)
    return (torch.utils.data.DataLoader(cifar_train, batch_size, shuffle=True,
                                        num_workers=get_dataloader_workers()),
            torch.utils.data.DataLoader(cifar_test, batch_size, shuffle=False,
                                        num_workers=get_dataloader_workers()))
    

### set the training parameters

In [48]:
batch_size = [128]
epochs = [5]
rounds = 1
lr = 0.01

## Train Function

#### using pynvml to get the GPU power consumption

In [50]:
def nvml_sampling_thread(handle, filename, stop_event, sampling_interval):
    """
    在单独的线程中定期调用 NVML, 获取功耗数据并存储到 data_queue 中。
    参数：
    - handle: nvmlDeviceGetHandleByIndex(0) 得到的 GPU 句柄
    - data_queue: 用于存放 (timestamp, power_in_watts) 数据的队列
    - stop_event: 当此事件被设置时，线程应结束循环
    - sampling_interval: 采样间隔（秒）
    """
    with open(filename/'energy_consumption_file.csv', 'a') as f:  # 追加模式
        # 写入列名
        f.write("timestamp,power_in_watts\n")
        while not stop_event.is_set():
            try:
                # 采集功率和时间戳
                current_time = time.time()
                current_power = pynvml.nvmlDeviceGetPowerUsage(handle) / 1000.0  # 转换 mW -> W
                # 写入文件
                f.write(f"{current_time},{current_power}\n")
                # 等待下一次采样
                time.sleep(sampling_interval)
            except pynvml.NVMLError as e:
                print(f"NVML Error: {e}")
                break

#### set the interval of the power consumption

In [51]:
def integrate_power_over_interval(samples, start_time, end_time):
    # 假定 samples是按时间升序排序的 (t, p)
    # 若未排序，请先排序:
    # samples = sorted(samples, key=lambda x: x[0])
    
    def interpolate(samples, target_time):
        # 在 samples 中找到 target_time 左右最近的两个点，并进行线性插值
        # 若 target_time 恰好等于某个样本点时间，直接返回该点功率
        # 若无法找到两侧点（如 target_time在样本时间轴外），根据情况返回None或边界点
        n = len(samples)
        if n == 0:
            return None
        # 若 target_time 小于第一个样本点时间，无法向左插值，这里直接返回第一个点的功率值(或None)
        if target_time <= samples[0][0]:
            # 简化处理：返回最早样本点的功率（或None）
            return samples[0][1]
        # 若 target_time 大于最后一个样本点时间，无法向右插值，返回最后一个点的功率（或None）
        if target_time >= samples[-1][0]:
            return samples[-1][1]

        # 否则，在中间插值
        # 使用二分查找快速定位
        import bisect
        times = [t for t, _ in samples]
        pos = bisect.bisect_left(times, target_time)
        # pos是使times保持有序插入target_time的位置
        # 因为target_time不在已有样本点中，pos不会越界且pos>0且pos<n
        t1, p1 = samples[pos-1]
        t2, p2 = samples[pos]
        # 线性插值： p = p1 + (p2 - p1)*((target_time - t1)/(t2 - t1))
        ratio = (target_time - t1) / (t2 - t1)
        p = p1 + (p2 - p1)*ratio
        return p

    # 从原始 samples 中筛选出位于[start_time, end_time]内的点
    filtered = [(t, p) for t, p in samples if start_time <= t <= end_time]

    # 如果不足2个点，则尝试使用插值
    if len(filtered) < 2:
        # 无论如何都需要在边界处插值出两个点(起码start和end)
        start_power = interpolate(samples, start_time)
        end_power = interpolate(samples, end_time)

        # 如果从样本中无法插值出任何有意义的点（比如samples为空或无法插值），返回0.0
        if start_power is None or end_power is None:
            return 0.0

        # 将插值的边界点加入到 filtered
        # 注意：如果filtered中有一个点在区间内，我们也需要确保边界有两点以上
        # 例如filtered只有一个点在中间，则需要在start和end插值点全部加入。
        # 若filtered为空，则只用start/end两点插值点求积分
        new_filtered = [(start_time, start_power)] + filtered + [(end_time, end_power)]
        # 确保按时间排序
        new_filtered.sort(key=lambda x: x[0])
        filtered = new_filtered

    # 正常积分计算
    if len(filtered) < 2:
        # 经过插值仍不够，返回0
        return 0.0

    total_energy = 0.0
    for i in range(len(filtered)-1):
        t1, p1 = filtered[i]
        t2, p2 = filtered[i+1]
        dt = t2 - t1
        avg_p = (p1 + p2)/2.0
        total_energy += avg_p * dt

    return total_energy

#### set the training process

In [52]:
def train_func(net, train_iter, test_iter, num_epochs, lr, device, filename, sampling_interval):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', device)
    net.to(device)
    # print(f'The name of the layers are: {alexlayer}')
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss_fn = nn.CrossEntropyLoss()

    # save all epochs time data using list
    to_device_intervals_total = []
    forward_intervals_total = []
    loss_intervals_total = []
    backward_intervals_total = []
    optimize_intervals_total = []
    test_intervals_total = []

    # create a list to store the epoch time data
    epoch_intervals_total = []
    
    # 初始化NVML和采样线程
    pynvml.nvmlInit()
    handle = pynvml.nvmlDeviceGetHandleByIndex(0)
    power_data_queue = queue.Queue()
    stop_event = threading.Event()
    sampler_thread = threading.Thread(target=nvml_sampling_thread, args=(handle, filename, stop_event, sampling_interval))
    sampler_thread.start()

    for epoch in range(num_epochs):
        print('The epoch is:', epoch+1)
        metric = d2l.Accumulator(3)  # train_loss, train_acc, num_examples
        to_device_intervals_epoch = []  # 用来记录本epoch每个batch的to_device时间段
        forward_intervals_epoch = []  # 用来记录本epoch每个batch的forward时间段
        loss_intervals_epoch = []  # 用来记录本epoch每个batch的loss时间段
        backward_intervals_epoch = [] 
        optimize_intervals_epoch = []
        test_intervals_epoch = []   
        epoch_intervals_epoch = []  # 用来记录本epoch的时间段

        epoch_start_time = time.time()

        net.train()
        for i, (X, y) in enumerate(train_iter):
            print('The batch is:', i+1)
            optimizer.zero_grad()
            torch.cuda.synchronize()

            # 记录to_device前后的时间戳
            start_ttd_time = time.time()
            X, y = X.to(device), y.to(device)
            torch.cuda.synchronize()
            end_ttd_time = time.time()
            to_device_intervals_epoch.append((start_ttd_time, end_ttd_time))

            # forward
            start_forward_time = time.time()
            y_hat = net(X)
            torch.cuda.synchronize()
            end_forward_time = time.time()
            forward_intervals_epoch.append((start_forward_time, end_forward_time))

            # loss
            start_loss_time = time.time()
            l = loss_fn(y_hat, y)
            torch.cuda.synchronize()
            end_loss_time = time.time()
            loss_intervals_epoch.append((start_loss_time, end_loss_time))

            # backward
            start_backward_time = time.time()
            l.backward()
            torch.cuda.synchronize()
            end_backward_time = time.time()
            backward_intervals_epoch.append((start_backward_time, end_backward_time))

            # optimize
            start_optimize_time = time.time()
            optimizer.step()
            torch.cuda.synchronize()
            end_optimize_time = time.time()
            optimize_intervals_epoch.append((start_optimize_time, end_optimize_time))

            with torch.no_grad():
                metric.add(l*X.shape[0], d2l.accuracy(y_hat, y), X.shape[0])
            train_acc = metric[1] / metric[2]

        start_test_time = time.time()
        test_acc = d2l.evaluate_accuracy_gpu(net, test_iter)
        end_test_time = time.time()
        print(f'train acc {train_acc:.3f}, test acc {test_acc:.3f}')
        test_intervals_epoch.append((start_test_time, end_test_time))

        epoch_end_time = time.time()
        epoch_intervals_epoch.append((epoch_start_time, epoch_end_time))

        # data need to be saved
        # add the intervals_epoch to intervals_total
        to_device_intervals_total.append(to_device_intervals_epoch)
        forward_intervals_total.append(forward_intervals_epoch)
        loss_intervals_total.append(loss_intervals_epoch)
        backward_intervals_total.append(backward_intervals_epoch)
        optimize_intervals_total.append(optimize_intervals_epoch)
        test_intervals_total.append(test_intervals_epoch)
        epoch_intervals_total.append(epoch_intervals_epoch)


    # 训练结束后关闭线程
    stop_event.set()
    sampler_thread.join()

    pynvml.nvmlShutdown()

    return to_device_intervals_total, forward_intervals_total, loss_intervals_total, backward_intervals_total, optimize_intervals_total, test_intervals_total, epoch_intervals_total

## Start to train the model

### set a function to train the model with FashionMNIST datasets

In [1]:
def train_model(main_folder, batch_size, num_epochs, round, lr, device, sample_interval, net):
    print(f'The epoch is set: {num_epochs}, batch is set: {batch_size}, is in {round+1}th running')
    # create the folder to store the data
    # epoch_batch_folder = main_folder/f'E{num_epochs}_B{batch_size}_R{round}'
    sr_number = int(sample_interval*1000)
    epoch_batch_folder = f'E{num_epochs}_B{batch_size}_R{round}_SR{sr_number}'
    dataset_dir = 'fashion_mnist'
    # dataset_dir = 'cifar100'
    # dataset_dir = 'cifar10'

    # the folder path is main_folder/epoch_batch_folder
    folder_path = main_folder/epoch_batch_folder/dataset_dir
    print(f'The folder path is: {folder_path}')
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    else:
        pass
    
    train_iter, test_iter = load_data_fashion_mnist(batch_size, resize=224)
    # show the shape of the data
    list_of_i = []
    for i, (X, y) in enumerate(train_iter):
        if i < 3:
            print('the shape of the', i, 'batch of the train_iter is:', X.shape)
        else:
            pass
        list_of_i.append(i)
    print(f'The number of batches is: {np.array(list_of_i).shape}')
    to_device_intervals_total, forward_intervals_total, loss_intervals_total,\
          backward_intervals_total, optimize_intervals_total, test_intervals_total, epoch_intervals_total = train_func(net, train_iter, test_iter, num_epochs, lr, device, folder_path, sample_interval)

    # transfer the data to the numpy array
    to_device_data = np.array(to_device_intervals_total)
    forward_time = np.array(forward_intervals_total)
    loss_time = np.array(loss_intervals_total)
    backward_time = np.array(backward_intervals_total)
    optimize_time = np.array(optimize_intervals_total)
    test_time = np.array(test_intervals_total)
    epoch_time = np.array(epoch_intervals_total)

    # save the data
    np.save(folder_path/'to_device.npy', to_device_data, allow_pickle=True)
    np.save(folder_path/'forward.npy', forward_time, allow_pickle=True)
    np.save(folder_path/'loss.npy', loss_time, allow_pickle=True)
    np.save(folder_path/'backward.npy', backward_time, allow_pickle=True)
    np.save(folder_path/'optimize.npy', optimize_time, allow_pickle=True)
    np.save(folder_path/'test.npy', test_time, allow_pickle=True)
    np.save(folder_path/'epoch.npy', epoch_time, allow_pickle=True)


### run the model

In [54]:
sampling_interval = 0.002 # 2ms
# create the folder to store the data

'''由于 2024.12.15 晚上训练的时候电脑自动锁屏导致训练停止,所以需要对后续的内容进行继续训练'''
for i in range(len(models_f_list)-5):
    # for each start, clear the cache in the gpu
    torch.cuda.empty_cache()
    net = models_f_list[i+5]
    main_folder = DataList[i+5]  
    print('The folder is:', main_folder)
    if main_folder.exists():
        print("文件存在。")
    else:
        os.makedirs(main_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", main_folder)
    for epoch in epochs:
        for batch in batch_size:
            for round in range(rounds):
                train_model(main_folder, batch, epoch, round, lr, device, sampling_interval, net)

The folder is: /root/autodl-tmp/GreenAI/3080/ModelsData/resnet34
文件存在。
The epoch is set: 5, batch is set: 128, is in 1th running
The folder path is: /root/autodl-tmp/GreenAI/3080/ModelsData/resnet34/E5_B128_R0_SR2


the shape of the 0 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 1 batch of the train_iter is: torch.Size([128, 1, 224, 224])
the shape of the 2 batch of the train_iter is: torch.Size([128, 1, 224, 224])
The number of batches is: (469,)
training on cuda
The epoch is: 1
The batch is: 1
The batch is: 2
The batch is: 3
The batch is: 4
The batch is: 5
The batch is: 6
The batch is: 7
The batch is: 8
The batch is: 9
The batch is: 10
The batch is: 11
The batch is: 12
The batch is: 13
The batch is: 14
The batch is: 15
The batch is: 16
The batch is: 17
The batch is: 18
The batch is: 19
The batch is: 20
The batch is: 21
The batch is: 22
The batch is: 23
The batch is: 24
The batch is: 25
The batch is: 26
The batch is: 27
The batch is: 28
The batch is: 29
The batch is: 30
The batch is: 31
The batch is: 32
The batch is: 33
The batch is: 34
The batch is: 35
The batch is: 36
The batch is: 37
The batch is: 38
The batch is: 39
The batch is: 40
The batch is: 41
The batch is

In [None]:
sampling_interval = 0.002 # 2ms
# create the folder to store the data

'''由于 2024.12.15 晚上训练的时候电脑自动锁屏导致训练停止,所以需要对后续的内容进行继续训练'''
'''2024.12.17-YJ: training is finished, so change back to the original code'''
for i in range(len(models_f_list)):
    # for each start, clear the cache in the gpu
    torch.cuda.empty_cache()
    net = models_f_list[i]
    main_folder = DataList[i]  
    print('The folder is:', main_folder)
    if main_folder.exists():
        print("文件存在。")
    else:
        os.makedirs(main_folder)
        print("文件不存在，已创建。")
        print("文件创建于：", main_folder)
    for epoch in epochs:
        for batch in batch_size:
            for round in range(rounds):
                train_model(main_folder, batch, epoch, round, lr, device, sampling_interval, net)