# Load Pretrained Model 

In [1]:
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from nni.compression.pytorch.speedup import ModelSpeedup
from nni.compression.pytorch.utils import count_flops_params
import time

from mnist_model import Net, train, test, device, optimizer_scheduler_generator, trainer, calibration

# Load pretrained model
model = torch.load("mnist_cnn.pt")
model.eval()

# show the model stbructure, note that pruner will wrap the model layer.
print(model)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


### Performance and statistics of pre-trained model 

In [2]:
## Performance 
start = time.time()
pre_best_acc = test(model, device)
pre_test_time = time.time() - start

print(f'Pretrained model Accuracy: {pre_best_acc: .2f}%, Test-time: {pre_test_time: .4f}s')


Test set: Average loss: 0.0267, Accuracy: 9919/10000 (99.19%)

Pretrained model Accuracy:  99.19%, Test-time:  1.6402s


# Quantizing Model  with QAT Quantizer

## Configuration 1

In [3]:
# Defining  configuration List
config_list = [{
    'quant_types': ['input', 'weight'],
    'quant_bits': {'input': 8, 'weight': 8},
    'op_types': ['Conv2d']
}]

In [4]:
 # Construct the ObserverQuantizer. Note that currently ObserverQuantizer only works in evaluation mode.
    
from nni.algorithms.compression.pytorch.quantization import ObserverQuantizer

optimizer, scheduler = optimizer_scheduler_generator(model)
quantizer = ObserverQuantizer(model, config_list, optimizer)
# Use the test data set to do calibration, this will not change the model parameters
calibration(model)
# obtain the quantization information and switch the model to "accuracy verification" mode
quantizer.compress()

In [5]:
## Finetune the model 
total_epoch = 3  
optimizer, scheduler = optimizer_scheduler_generator(model)
for epoch in range(1, total_epoch + 1):
        train(model, device, optimizer=optimizer, epoch=epoch)
        test(model, device)
        scheduler.step()


Test set: Average loss: 0.0352, Accuracy: 9903/10000 (99.03%)


Test set: Average loss: 0.0309, Accuracy: 9915/10000 (99.15%)


Test set: Average loss: 0.0307, Accuracy: 9910/10000 (99.10%)



In [6]:
## export model and get calibration_config

model_path = "./log/mnist_model.pth"
calibration_path = "./log/mnist_calibration.pth"
calibration_config = quantizer.export_model(model_path, calibration_path)

print("calibration_config: ", calibration_config)

[2022-10-11 17:12:41] [32mModel state_dict saved to ./log/mnist_model.pth[0m
[2022-10-11 17:12:41] [32mMask dict saved to ./log/mnist_calibration.pth[0m
calibration_config:  {'conv1': {'weight_bits': 8, 'tracked_max_weight': 0.518154501914978, 'tracked_min_weight': -0.518154501914978, 'tracked_qmin_weight': -127, 'tracked_qmax_weight': 127, 'input_bits': 8, 'tracked_min_input': -0.40611180663108826, 'tracked_max_input': 2.8174006938934326, 'tracked_qmin_input': 0, 'tracked_qmax_input': 127}, 'conv2': {'weight_bits': 8, 'tracked_max_weight': 0.32586750388145447, 'tracked_min_weight': -0.32586750388145447, 'tracked_qmin_weight': -127, 'tracked_qmax_weight': 127, 'input_bits': 8, 'tracked_min_input': 0.0, 'tracked_max_input': 3.26343035697937, 'tracked_qmin_input': 0, 'tracked_qmax_input': 127}}


In [7]:
## Performance 
start = time.time()
best_acc = test(model, device)
test_time = time.time() - start

print(f'Pretrained model Accuracy: {best_acc: .2f}%, Test-time: {test_time: .4f}s, Speed-up: {pre_test_time/test_time: .2f}x')


Test set: Average loss: 0.0299, Accuracy: 9908/10000 (99.08%)

Pretrained model Accuracy:  99.08%, Test-time:  1.4823s, Speed-up:  1.11x


## Configuration 2

In [8]:
# Load pretrained model
model = torch.load("mnist_cnn.pt")
model.eval()

config_list = [{
    'quant_types': ['weight', 'input'],
    'quant_bits': {'weight': 8, 'input': 8},
    'op_names': ['conv1', 'conv2']
}, {
    'quant_types': ['output', 'weight', 'input'],
    'quant_bits': {'output': 8, 'weight': 8, 'input': 8},
    'op_names': ['fc1', 'fc2'],
}]

In [9]:
 # Construct the ObserverQuantizer. Note that currently ObserverQuantizer only works in evaluation mode.
    
from nni.algorithms.compression.pytorch.quantization import ObserverQuantizer

optimizer, scheduler = optimizer_scheduler_generator(model)
quantizer = ObserverQuantizer(model, config_list, optimizer)
# Use the test data set to do calibration, this will not change the model parameters
calibration(model)
# obtain the quantization information and switch the model to "accuracy verification" mode
quantizer.compress()

In [10]:
## Finetune the model 
total_epoch = 3  
optimizer, scheduler = optimizer_scheduler_generator(model)
for epoch in range(1, total_epoch + 1):
        train(model, device, optimizer=optimizer, epoch=epoch)
        test(model, device)
        scheduler.step()


Test set: Average loss: 0.0272, Accuracy: 9918/10000 (99.18%)


Test set: Average loss: 0.0272, Accuracy: 9918/10000 (99.18%)


Test set: Average loss: 0.0272, Accuracy: 9918/10000 (99.18%)



In [11]:
## export model and get calibration_config

model_path = "./log/mnist_model.pth"
calibration_path = "./log/mnist_calibration.pth"
calibration_config = quantizer.export_model(model_path, calibration_path)

print("calibration_config: ", calibration_config)

[2022-10-11 17:14:28] [32mModel state_dict saved to ./log/mnist_model.pth[0m
[2022-10-11 17:14:28] [32mMask dict saved to ./log/mnist_calibration.pth[0m
calibration_config:  {'conv1': {'weight_bits': 8, 'tracked_max_weight': 0.518154501914978, 'tracked_min_weight': -0.518154501914978, 'tracked_qmin_weight': -127, 'tracked_qmax_weight': 127, 'input_bits': 8, 'tracked_min_input': -0.40611180663108826, 'tracked_max_input': 2.8174006938934326, 'tracked_qmin_input': 0, 'tracked_qmax_input': 127}, 'conv2': {'weight_bits': 8, 'tracked_max_weight': 0.32586750388145447, 'tracked_min_weight': -0.32586750388145447, 'tracked_qmin_weight': -127, 'tracked_qmax_weight': 127, 'input_bits': 8, 'tracked_min_input': 0.0, 'tracked_max_input': 3.2396812438964844, 'tracked_qmin_input': 0, 'tracked_qmax_input': 127}, 'fc1': {'weight_bits': 8, 'tracked_max_weight': 0.17832523584365845, 'tracked_min_weight': -0.17832523584365845, 'tracked_qmin_weight': -127, 'tracked_qmax_weight': 127, 'input_bits': 8, 'tr

In [12]:
## Performance 
start = time.time()
best_acc = test(model, device)
test_time = time.time() - start

print(f'Pretrained model Accuracy: {best_acc: .2f}%, Test-time: {test_time: .4f}s, Speed-up: {pre_test_time/test_time: .2f}x')


Test set: Average loss: 0.0266, Accuracy: 9919/10000 (99.19%)

Pretrained model Accuracy:  99.19%, Test-time:  1.5184s, Speed-up:  1.08x
