# Load Pretrained Model 

In [1]:
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from nni.compression.pytorch.speedup import ModelSpeedup
from nni.compression.pytorch.utils import count_flops_params
import time

from mnist_model import Net, train, test, device, optimizer_scheduler_generator, trainer, test_trt

# Load pretrained model
model = torch.load("mnist_cnn.pt")
model.eval()

# show the model stbructure, note that pruner will wrap the model layer.
print(model)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


### Performance and statistics of pre-trained model 

In [2]:
start = time.time()

pre_best_acc = test(model, device)
pre_test_time = time.time() - start

pre_flops, pre_params, _ = count_flops_params(model, torch.randn([3, 1, 28, 28]).to(device))
print(f'Pretrained model FLOPs {pre_flops/1e6:.2f} M, #Params: {pre_params/1e6:.2f}M, Accuracy: {pre_best_acc: .2f}%, Test-time: {pre_test_time: .4f}s')


Test set: Average loss: 0.0267, Accuracy: 9919/10000 (99.19%)

+-------+-------+--------+----------------+-----------------+-----------------+----------+---------+
| Index | Name  |  Type  |  Weight Shape  |    Input Size   |   Output Size   |  FLOPs   | #Params |
+-------+-------+--------+----------------+-----------------+-----------------+----------+---------+
|   0   | conv1 | Conv2d | (32, 1, 3, 3)  |  (3, 1, 28, 28) | (3, 32, 26, 26) |  194688  |   320   |
|   1   | conv2 | Conv2d | (64, 32, 3, 3) | (3, 32, 26, 26) | (3, 64, 24, 24) | 10616832 |  18496  |
|   2   | fc1   | Linear |  (128, 9216)   |    (3, 9216)    |     (3, 128)    | 1179648  | 1179776 |
|   3   | fc2   | Linear |   (10, 128)    |     (3, 128)    |     (3, 10)     |   1280   |   1290  |
+-------+-------+--------+----------------+-----------------+-----------------+----------+---------+
FLOPs total: 11992448
#Params total: 1199882
Pretrained model FLOPs 11.99 M, #Params: 1.20M, Accuracy:  99.19%, Test-time:  1.51

# Quantizing Model  with QAT Quantizer

## Configuration 1

In [3]:
# Defining  configuration List
config_list = [{
    'quant_types': ['input', 'weight'],
    'quant_bits': {'input': 8, 'weight': 8},
    'op_types': ['Conv2d']
}]

In [4]:
from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
dummy_input = torch.rand(3, 1, 28, 28).to(device)
optimizer, scheduler = optimizer_scheduler_generator(model)
quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input)
quantizer.compress()

Net(
  (conv1): QuantizerModuleWrapper(
    (module): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  )
  (conv2): QuantizerModuleWrapper(
    (module): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  )
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [5]:
## Finetune the model 
total_epoch = 3  
optimizer, scheduler = optimizer_scheduler_generator(model)
for epoch in range(1, total_epoch + 1):
        train(model, device, optimizer=optimizer, epoch=epoch)
        test(model, device)
        scheduler.step()


Test set: Average loss: 0.0340, Accuracy: 9903/10000 (99.03%)


Test set: Average loss: 0.0364, Accuracy: 9905/10000 (99.05%)


Test set: Average loss: 0.0302, Accuracy: 9906/10000 (99.06%)



In [6]:
## export model and get calibration_config

model_path = "./log/mnist_model.pth"
calibration_path = "./log/mnist_calibration.pth"
calibration_config = quantizer.export_model(model_path, calibration_path)

print("calibration_config: ", calibration_config)

[2022-10-11 14:57:02] [32mModel state_dict saved to ./log/mnist_model.pth[0m
[2022-10-11 14:57:02] [32mMask dict saved to ./log/mnist_calibration.pth[0m
calibration_config:  {'conv1': {'weight_bits': 8, 'weight_scale': tensor([0.0037], device='cuda:0'), 'weight_zero_point': tensor([142.], device='cuda:0'), 'input_bits': 8, 'tracked_min_input': -0.4242129623889923, 'tracked_max_input': 2.821486711502075}, 'conv2': {'weight_bits': 8, 'weight_scale': tensor([0.0030], device='cuda:0'), 'weight_zero_point': tensor([138.], device='cuda:0'), 'input_bits': 8, 'tracked_min_input': 0.0, 'tracked_max_input': 2.997408151626587}}


## build tensorRT engine to make a real speedup
from nni.compression.pytorch.quantization_speedup import ModelSpeedupTensorRT
input_shape = (1, 1, 28, 28)
engine = ModelSpeedupTensorRT(model, input_shape, config=calibration_config, batchsize=64)
engine.compress()
test_trt(engine)

from nni.compression.pytorch.pruning import ADMMPruner
from nni.compression.pytorch.pruning import ActivationMeanRankPruner
from nni.compression.pytorch.speedup import ModelSpeedup
import nni

import torch.nn.functional as F

def pruner_function(config_list):

    model = torch.load("mnist_cnn.pt")
    model.eval()

    traced_optimizer = nni.trace(optim.Adadelta)(model.parameters(), lr=1.0)
    criterion = F.nll_loss
    
    # Using ADMMPruner to prune the model and generate the masks.
    pruner = ADMMPruner(model, config_list, trainer, traced_optimizer, criterion, iterations=5, training_epochs=1, granularity='coarse-grained')
    
    # pruner = ActivationMeanRankPruner(model, config_list, trainer, traced_optimizer, criterion, training_batches=20)
    
    # show the wrapped model structure, `PrunerModuleWrapper` have wrapped the layers that configured in the config_list.
    #print(model)

    # compress the model and generate the masks
    _, masks = pruner.compress()

    # show the masks sparsity
    print("Showing the masks sparsity")
    for name, mask in masks.items():
        print(name, ' sparsity : ', '{:.2}'.format(mask['weight'].sum() / mask['weight'].numel()))


    # need to unwrap the model, if the model is wrapped before speedup
    pruner._unwrap_model()

    # speedup the model, for more information about speedup, please refer :doc:`pruning_speedup`.
    ModelSpeedup(model, torch.rand(3, 1, 28, 28).to(device), masks).speedup_model()

    #print("Model after speedup")
    #print(model)

    optimizer, scheduler = optimizer_scheduler_generator(model)
    
    # fine- tuning model compacted model
    # tuning and evaluate the model on MNIST dataset
    total_epoch = 3
    
    for epoch in range(1, total_epoch + 1):
        train(model, device, optimizer=optimizer, epoch=epoch)
        test(model, device)
        scheduler.step()
        
    return model

def Perfomance_function(model):
    print("Model after speedup")
    print(model)
    
    start = time.time()
    best_acc = test(model, device)
    test_time = time.time() - start

    flops, params, _ = count_flops_params(model, torch.randn([3, 1, 28, 28]).to(device))

    print(f'Pretrained model FLOPs {pre_flops/1e6:.2f} M, #Params: {pre_params/1e6:.2f}M, Accuracy: {pre_best_acc: .2f}%, , Test-time: {pre_test_time: .4f}s')
    print(f'Finetuned model FLOPs {flops/1e6:.2f} M, #Params: {params/1e6:.2f}M, Accuracy: {best_acc: .2f}%, Test-time: {test_time: .4f}s, Speed-up: {pre_test_time/test_time: .2f}x')

## ADMM Configuration 1

config_list = [{
    'sparsity_per_layer': 0.50,
    'op_types': ['Linear', 'Conv2d']
}, {
    'exclude': True,
    'op_names': ['fc2']
}]


pruned_model = pruner_function(config_list=config_list)

Perfomance_function(pruned_model)

## ADMM Configuration 2

config_list = [{
    'op_types': ['Conv2d'],
    'total_sparsity': 0.5
    }, {
    'op_names': ['Linear'],
    'total_sparsity': 0.8
    },
    {
    'exclude': True,
    'op_names': ['fc2']
}]


pruned_model = pruner_function(config_list=config_list)

Perfomance_function(pruned_model)