# 神经网络架构搜索(NAS)实践

## 任务：做一个简单的NAS算法

网格搜索(grid search):算法思路，将所有可能的组合进行网格搜索，选择最合适的那个即可




加载python函数库

In [None]:
import random
import numpy as np
from tqdm.auto import tqdm

import torch
from torch import nn
from torch.optim import *
from torch.optim.lr_scheduler import *
from torch.utils.data import DataLoader
from torchprofile import profile_macs
from torchvision.datasets import *
from torchvision.transforms import *
import torch.nn.functional as F
from torchvision import datasets


random.seed(0)
np.random.seed(0)
torch.manual_seed(0)

# 加载数据集和定义训练过程

In [None]:
# 设置归一化
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])

# 获取数据集
train_dataset = datasets.MNIST(root='../ch02/data/mnist', train=True, download=True, transform=transform)  
test_dataset = datasets.MNIST(root='../ch02/data/mnist', train=False, download=True, transform=transform)  # train=True训练集，=False测试集

# 设置DataLoader
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
def train(
  model: nn.Module,
  dataloader: DataLoader,
  criterion: nn.Module,
  optimizer: Optimizer,
  # scheduler: LambdaLR,
  callbacks = None
) -> None:
  model.train()

  for inputs, targets in tqdm(dataloader, desc='train', leave=False):
    # Move the data from CPU to GPU
    # inputs = inputs.to('mps')
    # targets = targets.to('mps')

    # Reset the gradients (from the last iteration)
    optimizer.zero_grad()

    # Forward inference
    outputs = model(inputs.cuda()).cpu()
    loss = criterion(outputs, targets)

    # Backward propagation
    loss.backward()

    # Update optimizer and LR scheduler
    optimizer.step()
    # scheduler.step()

    if callbacks is not None:
        for callback in callbacks:
            callback()

In [None]:
@torch.inference_mode()
def evaluate(
  model: nn.Module,
  dataloader: DataLoader,
  extra_preprocess = None
) -> float:
  model.eval()

  num_samples = 0
  num_correct = 0

  for inputs, targets in tqdm(dataloader, desc="eval", leave=False):
    # Move the data from CPU to GPU
    # inputs = inputs.to('mps')
    if extra_preprocess is not None:
        for preprocess in extra_preprocess:
            inputs = preprocess(inputs)

    # targets = targets.to('mps')

    # Inference
    outputs = model(inputs.cuda()).cpu()

    # Convert logits to class indices
    outputs = outputs.argmax(dim=1)

    # Update metrics
    num_samples += targets.size(0)
    num_correct += (outputs == targets).sum()

  return (num_correct / num_samples * 100).item()

In [None]:
def get_model_flops(model, inputs):
    num_macs = profile_macs(model, inputs)
    return num_macs

def get_model_size(model: nn.Module, data_width=32):
    """
    calculate the model size in bits
    :param data_width: #bits per element
    """
    num_elements = 0
    for param in model.parameters():
        num_elements += param.numel()
    return num_elements * data_width

Byte = 8
KiB = 1024 * Byte
MiB = 1024 * KiB
GiB = 1024 * MiB

# 神经网络架构搜索

## 任务，在给定约束的模型大小下，找到最好的结果

搜索方法：Grid search

搜索空间：在LeNet上，定义不同的遍历情况，进行组合，然后依次训练并验证模型大小

输出：最终输出合适的神经网络结构，并保存




In [None]:
# 定义一个LeNet网络
class LeNet(nn.Module):
    def __init__(self, conv1_channel=6, conv1_kernel=5, conv2_channel=16, conv2_kernel=5, fc1_size=128, fc2_size=84, num_classes=10):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=conv1_channel, kernel_size=conv1_kernel) 
        # 1 x 28 x 28 -> conv1_channel x (28-conv1_kernel+1) x (28-conv1_kernel+1)
        # conv1_channel x (28-conv1_kernel+1) x (28-conv1_kernel+1) -> conv1_channel x (28-conv1_kernel+1)//2 x (28-conv1_kernel+1)//2
        self.conv2 = nn.Conv2d(in_channels=conv1_channel, out_channels=conv2_channel, kernel_size=conv2_kernel) 
        # conv1_channel x (28-conv1_kernel+1)//2 x (28-conv1_kernel+1)//2 -> conv2_channel x ((28-conv1_kernel+1)//2- conv2_kernel + 1) x ((28-conv1_kernel+1)//2- conv2_kernel + 1)
        # conv2_channel x ((28-conv1_kernel+1)//2- conv2_kernel + 1) x ((28-conv1_kernel+1)//2- conv2_kernel + 1) -> conv2_channel x ((28-conv1_kernel+1)//2- conv2_kernel + 1)//2 x ((28-conv1_kernel+1)//2- conv2_kernel + 1)//2

        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        linear_input_dim = conv2_channel * (((28-conv1_kernel+1)//2-conv2_kernel+1)//2)**2 

        self.fc1 = nn.Linear(in_features=linear_input_dim, out_features=fc1_size)
        self.fc2 = nn.Linear(in_features=fc1_size, out_features=fc2_size)
        self.fc3 = nn.Linear(in_features=fc2_size, out_features=num_classes)

    def forward(self, x):
        x = self.maxpool(F.relu(self.conv1(x)))
        x = self.maxpool(F.relu(self.conv2(x)))
        
        x = x.view(x.size()[0], -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = LeNet().to(device=device)

In [None]:
# 定义搜索空间
conv1_channel_list = [3, 6, 9]
conv1_kernel_list = [3, 5, 7]
conv2_channel_list = [12, 16, 20]
conv2_kernel_list = [3, 5, 7]
fc1_size_list = [64, 128, 256]
fc2_size_list = [32, 84, 120]

# 模型训练参数配置
lr = 0.01
momentum = 0.5
num_epoch = 5
criterion = nn.CrossEntropyLoss()  # 交叉熵损失
limitation_model_size = 0.05 # 限制模型大小

# 记录全局最好
best_model = None
best_model_info = ""
overall_best_accuracy = 0

## 开始进行搜索
print("Searning")
for conv1_channel in conv1_channel_list:
    for conv1_kernel in conv1_kernel_list:
        for conv2_channel in conv2_channel_list:
            for conv2_kernel in conv2_kernel_list:
                for fc1_size in fc1_size_list:
                    for fc2_size in fc2_size_list:
                        model = LeNet(conv1_channel, conv1_kernel, conv2_channel, conv2_kernel, fc1_size, fc2_size).to(device=device)
                        fp32_model_size = get_model_size(model)
                        if fp32_model_size/MiB < limitation_model_size:
                            print(f"conv1_channel:{conv1_channel}, conv1_kenle:{conv1_kernel}, conv2_channel:{conv2_channel}, conv2_kenle:{conv2_kernel}, "\
                              f"fc1_size:{fc1_size}, fc2_size:{fc2_size}")
                            print(f"model has size={fp32_model_size/MiB:.2f} MiB")
                            optimizer = torch.optim.SGD(model.parameters(),  lr=lr, momentum=momentum)  # lr学习率，momentum冲量
                            best_accuracy = 0 
                            best_checkpoint = dict()
                            for epoch in range(num_epoch):
                                train(model, train_loader, criterion, optimizer)
                                accuracy = evaluate(model, test_loader)
                                is_best = accuracy > best_accuracy
                                if is_best:
                                    best_checkpoint['state_dict'] = copy.deepcopy(model.state_dict())
                                    best_accuracy = accuracy
                                print(f'Epoch{epoch+1:>2d} Accuracy {accuracy:.2f}% / Best Accuracy: {best_accuracy:.2f}%')
                            model.load_state_dict(best_checkpoint['state_dict'])
                            model_accuracy = evaluate(model, test_loader)
                            print(f"Model has accuracy={model_accuracy:.2f}%")
                            # 全局记录
                            if model_accuracy > overall_best_accuracy:
                                overall_best_accuracy = model_accuracy
                                best_model = model
                                best_model_info = f"conv1_channel:{conv1_channel}, conv1_kenle:{conv1_kernel}, conv2_channel:{conv2_channel}, conv2_kenle:{conv2_kernel}, "\
                              f"fc1_size:{fc1_size}, fc2_size:{fc2_size}"

In [None]:
# 输出全局最好的结果
print("the result of searching is "+ best_model_info)
print(f"the accumulate of best model is:{overall_best_accuracy:.2f}%")
# print(best_model)
