# nni模型压缩

## 创建模型 并进行预训练

In [1]:
import torch
import torch.nn.functional as F
from torch.optim import SGD

from scripts.compression_mnist_model import TorchModel, trainer, evaluator, device

# define the model
model = TorchModel().to(device)

# show the model structure, note that pruner will wrap the model layer.
print(model)

TorchModel(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (relu1): ReLU()
  (relu2): ReLU()
  (relu3): ReLU()
  (relu4): ReLU()
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
)


In [2]:
# define the optimizer and criterion for pre-training

optimizer = SGD(model.parameters(), 1e-2)
criterion = F.nll_loss

# pre-train and evaluate the model on MNIST dataset
for epoch in range(3):
    trainer(model, optimizer, criterion)
    evaluator(model)

Average test loss: 0.8338, Accuracy: 8061/10000 (81%)
Average test loss: 0.2623, Accuracy: 9190/10000 (92%)
Average test loss: 0.1851, Accuracy: 9429/10000 (94%)


### 原始模型速度测试

In [3]:
import time
start = time.time()
model(torch.rand(128, 1, 28, 28).to(device))
print('Original Model - Elapsed Time : ', time.time() - start)

Original Model - Elapsed Time :  0.0036008358001708984


## 模型剪枝
·使用 L1NormPruner 对模型进行剪枝 并 生成掩码
·两个输入参数:
    config_list : 设置修剪类型
    model : 待修剪的模型

In [4]:
config_list = [{
    'sparsity_per_layer': 0.5,
    'op_types': ['Linear', 'Conv2d']
}, {
    'exclude': True,
    'op_names': ['fc3']
}]

In [5]:
from nni.compression.pytorch.pruning import L1NormPruner
pruner = L1NormPruner(model, config_list)

# show the wrapped model structure, `PrunerModuleWrapper` have wrapped the layers that configured in the config_list.

In [6]:
# compress the model and generate the masks
_, masks = pruner.compress()
# show the masks sparsity
for name, mask in masks.items():
    print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))

conv1  sparsity :  0.5
conv2  sparsity :  0.5
fc1  sparsity :  0.5
fc2  sparsity :  0.5


### 输出的掩码 masks 并未真正的减小模型，只是置零，参数仍需计算
需要 nni 的 ModelSpeedup 使模型真正变小

In [7]:
# need to unwrap the model, if the model is wrapped before speedup
pruner._unwrap_model()

# speedup the model
from nni.compression.pytorch.speedup import ModelSpeedup

ModelSpeedup(model, torch.rand(3, 1, 28, 28).to(device), masks).speedup_model()

[2023-01-31 10:10:40] [32mstart to speedup the model[0m
[2023-01-31 10:10:41] [32minfer module masks...[0m
[2023-01-31 10:10:41] [32mUpdate mask for conv1[0m
[2023-01-31 10:10:41] [32mUpdate mask for relu1[0m
[2023-01-31 10:10:41] [32mUpdate mask for pool1[0m
[2023-01-31 10:10:41] [32mUpdate mask for conv2[0m
[2023-01-31 10:10:41] [32mUpdate mask for relu2[0m
[2023-01-31 10:10:41] [32mUpdate mask for pool2[0m
[2023-01-31 10:10:41] [32mUpdate mask for .aten::flatten.11[0m
[2023-01-31 10:10:41] [32mUpdate mask for fc1[0m
[2023-01-31 10:10:41] [32mUpdate mask for relu3[0m
[2023-01-31 10:10:41] [32mUpdate mask for fc2[0m
[2023-01-31 10:10:41] [32mUpdate mask for relu4[0m
[2023-01-31 10:10:41] [32mUpdate mask for fc3[0m
[2023-01-31 10:10:41] [32mUpdate mask for .aten::log_softmax.12[0m
[2023-01-31 10:10:41] [32mUpdate the indirect sparsity for the .aten::log_softmax.12[0m
[2023-01-31 10:10:41] [32mUpdate the indirect sparsity for the fc3[0m
[2023-01-31 10:1

  if last_output.grad is not None and tin.grad is not None:
  elif last_output.grad is None:


In [8]:
print(model)

TorchModel(
  (conv1): Conv2d(1, 3, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(3, 8, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=128, out_features=60, bias=True)
  (fc2): Linear(in_features=60, out_features=42, bias=True)
  (fc3): Linear(in_features=42, out_features=10, bias=True)
  (relu1): ReLU()
  (relu2): ReLU()
  (relu3): ReLU()
  (relu4): ReLU()
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (pool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
)


### 加速后速度测试

In [9]:
start = time.time()
model(torch.rand(128, 1, 28, 28).to(device))
print('Speedup Model - Elapsed Time : ', time.time() - start)

Speedup Model - Elapsed Time :  0.003857135772705078


## 微调剪枝后的模型
微调模型前，需要重新生成 optimizer，（加速过程中进行了层替换，原来的 optimizer 已不适用新模型）

In [10]:
optimizer = SGD(model.parameters(), 1e-2)
for epoch in range(3):
    trainer(model, optimizer, criterion)

In [11]:
start = time.time()
model(torch.rand(128, 1, 28, 28).to(device))
print('Speedup Model - Elapsed Time : ', time.time() - start)

Speedup Model - Elapsed Time :  0.003574848175048828
