# Load Pretrained Model 

In [1]:
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from nni.compression.pytorch.speedup import ModelSpeedup
from nni.compression.pytorch.utils import count_flops_params
import time

from mnist_model import Net, train, test, device, optimizer_scheduler_generator, trainer, test_trt

# Load pretrained model
model = torch.load("mnist_cnn.pt")
model.eval()

# show the model stbructure, note that pruner will wrap the model layer.
print(model)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


### Performance and statistics of pre-trained model 

In [2]:
## Performance 
start = time.time()
pre_best_acc = test(model, device)
pre_test_time = time.time() - start

print(f'Pretrained model Accuracy: {pre_best_acc: .2f}%, Test-time: {pre_test_time: .4f}s')


Test set: Average loss: 0.0267, Accuracy: 9919/10000 (99.19%)

Pretrained model Accuracy:  99.19%, Test-time:  1.5021s


# Quantizing Model  with QAT Quantizer

## Configuration 1

In [3]:
# Defining  configuration List
config_list = [{
    'quant_types': ['input', 'weight'],
    'quant_bits': {'input': 8, 'weight': 8},
    'op_types': ['Conv2d']
}]

In [4]:
## Quantization 
from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
dummy_input = torch.rand(3, 1, 28, 28).to(device)
optimizer, scheduler = optimizer_scheduler_generator(model)
quantizer = QAT_Quantizer(model, config_list, optimizer, dummy_input)
quantizer.compress()

Net(
  (conv1): QuantizerModuleWrapper(
    (module): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  )
  (conv2): QuantizerModuleWrapper(
    (module): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  )
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [5]:
## Finetune the model 
total_epoch = 3  
optimizer, scheduler = optimizer_scheduler_generator(model)
for epoch in range(1, total_epoch + 1):
        train(model, device, optimizer=optimizer, epoch=epoch)
        test(model, device)
        scheduler.step()


Test set: Average loss: 0.0387, Accuracy: 9871/10000 (98.71%)


Test set: Average loss: 0.0372, Accuracy: 9899/10000 (98.99%)


Test set: Average loss: 0.0302, Accuracy: 9908/10000 (99.08%)



In [6]:
## export model and get calibration_config

model_path = "./log/mnist_model.pth"
calibration_path = "./log/mnist_calibration.pth"
calibration_config = quantizer.export_model(model_path, calibration_path)

print("calibration_config: ", calibration_config)

[2022-10-11 16:47:41] [32mModel state_dict saved to ./log/mnist_model.pth[0m
[2022-10-11 16:47:41] [32mMask dict saved to ./log/mnist_calibration.pth[0m
calibration_config:  {'conv1': {'weight_bits': 8, 'weight_scale': tensor([0.0036], device='cuda:0'), 'weight_zero_point': tensor([135.], device='cuda:0'), 'input_bits': 8, 'tracked_min_input': -0.4242129623889923, 'tracked_max_input': 2.821486711502075}, 'conv2': {'weight_bits': 8, 'weight_scale': tensor([0.0029], device='cuda:0'), 'weight_zero_point': tensor([130.], device='cuda:0'), 'input_bits': 8, 'tracked_min_input': 0.0, 'tracked_max_input': 2.627883195877075}}


In [12]:
## Performance 
start = time.time()
best_acc = test(model, device)
test_time = time.time() - start

print(f'Pretrained model Accuracy: {best_acc: .2f}%, Test-time: {test_time: .4f}s, Speed-up: {pre_test_time/test_time: .2f}x')


Test set: Average loss: 0.0301, Accuracy: 9908/10000 (99.08%)

Pretrained model Accuracy:  99.08%, Test-time:  1.5405s, Speed-up:  0.98x


## Configuration 2

In [18]:
# Load pretrained model
model = torch.load("mnist_cnn.pt")
model.eval()

configure_list = [{
    'quant_types': ['weight', 'input'],
    'quant_bits': {'weight': 8, 'input': 8},
    'op_names': ['conv1', 'conv2']
}, {
    'quant_types': ['output', 'weight', 'input'],
    'quant_bits': {'output': 8, 'weight': 8, 'input': 8},
    'op_names': ['fc1', 'fc2'],
}]

In [19]:
## Quantization 
from nni.algorithms.compression.pytorch.quantization import QAT_Quantizer
dummy_input = torch.rand(3, 1, 28, 28).to(device)
optimizer, scheduler = optimizer_scheduler_generator(model)
quantizer = QAT_Quantizer(model, configure_list, optimizer, dummy_input)
quantizer.compress()

Net(
  (conv1): QuantizerModuleWrapper(
    (module): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  )
  (conv2): QuantizerModuleWrapper(
    (module): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  )
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): QuantizerModuleWrapper(
    (module): Linear(in_features=9216, out_features=128, bias=True)
  )
  (fc2): QuantizerModuleWrapper(
    (module): Linear(in_features=128, out_features=10, bias=True)
  )
)

In [20]:
## Finetune the model 
total_epoch = 3  
optimizer, scheduler = optimizer_scheduler_generator(model)
for epoch in range(1, total_epoch + 1):
        train(model, device, optimizer=optimizer, epoch=epoch)
        test(model, device)
        scheduler.step()


Test set: Average loss: 0.0425, Accuracy: 9872/10000 (98.72%)


Test set: Average loss: 0.0343, Accuracy: 9895/10000 (98.95%)


Test set: Average loss: 0.0277, Accuracy: 9919/10000 (99.19%)



In [21]:
## export model and get calibration_config

model_path = "./log/mnist_model.pth"
calibration_path = "./log/mnist_calibration.pth"
calibration_config = quantizer.export_model(model_path, calibration_path)

print("calibration_config: ", calibration_config)

[2022-10-11 16:58:34] [32mModel state_dict saved to ./log/mnist_model.pth[0m
[2022-10-11 16:58:34] [32mMask dict saved to ./log/mnist_calibration.pth[0m
calibration_config:  {'conv1': {'weight_bits': 8, 'weight_scale': tensor([0.0038], device='cuda:0'), 'weight_zero_point': tensor([138.], device='cuda:0'), 'input_bits': 8, 'tracked_min_input': -0.4242129623889923, 'tracked_max_input': 2.821486711502075}, 'conv2': {'weight_bits': 8, 'weight_scale': tensor([0.0028], device='cuda:0'), 'weight_zero_point': tensor([140.], device='cuda:0'), 'input_bits': 8, 'tracked_min_input': 0.0, 'tracked_max_input': 2.588933229446411}, 'fc1': {'weight_bits': 8, 'weight_scale': tensor([0.0014], device='cuda:0'), 'weight_zero_point': tensor([122.], device='cuda:0'), 'input_bits': 8, 'tracked_min_input': 0.0, 'tracked_max_input': 4.223904609680176, 'output_bits': 8, 'tracked_min_output': -9.732779502868652, 'tracked_max_output': 8.64108657836914}, 'fc2': {'weight_bits': 8, 'weight_scale': tensor([0.0033

In [22]:
## Performance 
start = time.time()
best_acc = test(model, device)
test_time = time.time() - start

print(f'Pretrained model Accuracy: {best_acc: .2f}%, Test-time: {test_time: .4f}s, Speed-up: {pre_test_time/test_time: .2f}x')


Test set: Average loss: 0.0275, Accuracy: 9921/10000 (99.21%)

Pretrained model Accuracy:  99.21%, Test-time:  1.5821s, Speed-up:  0.95x
