# Load Pretrained Model 

In [1]:
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from nni.compression.pytorch.speedup import ModelSpeedup
from nni.compression.pytorch.utils import count_flops_params
import time

from mnist_model import Net, train, test, device, optimizer_scheduler_generator, trainer, calibration

# Load pretrained model
model = torch.load("mnist_cnn.pt")
model.eval()

# show the model stbructure, note that pruner will wrap the model layer.
print(model)

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


### Performance and statistics of pre-trained model 

In [2]:
## Performance 
start = time.time()
pre_best_acc = test(model, device)
pre_test_time = time.time() - start

print(f'Pretrained model Accuracy: {pre_best_acc: .2f}%, Test-time: {pre_test_time: .4f}s')


Test set: Average loss: 0.0267, Accuracy: 9919/10000 (99.19%)

Pretrained model Accuracy:  99.19%, Test-time:  1.6945s


# Quantizing Model  with QAT Quantizer

## Configuration 1

In [5]:
# Defining  configuration List
config_list = [{
        'quant_types': ['weight'],
        'quant_bits': {
            'weight': 8,
        },
        'op_types':['Conv2d']
    }]

In [6]:
 # Construct the DoReFaQuantizer.
from nni.algorithms.compression.pytorch.quantization import DoReFaQuantizer
optimizer, scheduler = optimizer_scheduler_generator(model)
quantizer = DoReFaQuantizer(model, config_list, optimizer)
quantizer.compress()

Net(
  (conv1): QuantizerModuleWrapper(
    (module): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  )
  (conv2): QuantizerModuleWrapper(
    (module): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  )
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [7]:
## Finetune the model 
total_epoch = 3  
optimizer, scheduler = optimizer_scheduler_generator(model)
for epoch in range(1, total_epoch + 1):
        train(model, device, optimizer=optimizer, epoch=epoch)
        test(model, device)
        scheduler.step()


Test set: Average loss: 0.0608, Accuracy: 9866/10000 (98.66%)


Test set: Average loss: 0.0508, Accuracy: 9882/10000 (98.82%)


Test set: Average loss: 0.0518, Accuracy: 9880/10000 (98.80%)



In [8]:
## export model and get calibration_config

model_path = "./log/mnist_model.pth"
calibration_path = "./log/mnist_calibration.pth"
calibration_config = quantizer.export_model(model_path, calibration_path)

print("calibration_config: ", calibration_config)

[2022-10-11 17:44:01] [32mModel state_dict saved to ./log/mnist_model.pth[0m
[2022-10-11 17:44:01] [32mMask dict saved to ./log/mnist_calibration.pth[0m
calibration_config:  {'conv1': {'weight_bits': 8}, 'conv2': {'weight_bits': 8}}


In [9]:
## Performance 
start = time.time()
best_acc = test(model, device)
test_time = time.time() - start

print(f'Pretrained model Accuracy: {best_acc: .2f}%, Test-time: {test_time: .4f}s, Speed-up: {pre_test_time/test_time: .2f}x')


Test set: Average loss: 0.0518, Accuracy: 9880/10000 (98.80%)

Pretrained model Accuracy:  98.80%, Test-time:  1.6796s, Speed-up:  1.01x


## Configuration 2

In [10]:
# Load pretrained model
model = torch.load("mnist_cnn.pt")
model.eval()

config_list = [{
    'op_types': ['Conv2d'],
    'quant_types': ['weight'],
    'quant_bits': {'weight': 8}
}, {
    'op_names': ['fc1', 'fc2'],
    'quant_types': [ 'weight'],
    'quant_bits': {'weight': 8}
}]

In [11]:
 # Construct the DoReFaQuantizer.
from nni.algorithms.compression.pytorch.quantization import DoReFaQuantizer
optimizer, scheduler = optimizer_scheduler_generator(model)
quantizer = DoReFaQuantizer(model, config_list, optimizer)
quantizer.compress()

Net(
  (conv1): QuantizerModuleWrapper(
    (module): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  )
  (conv2): QuantizerModuleWrapper(
    (module): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  )
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): QuantizerModuleWrapper(
    (module): Linear(in_features=9216, out_features=128, bias=True)
  )
  (fc2): QuantizerModuleWrapper(
    (module): Linear(in_features=128, out_features=10, bias=True)
  )
)

In [12]:
## Finetune the model 
total_epoch = 3  
optimizer, scheduler = optimizer_scheduler_generator(model)
for epoch in range(1, total_epoch + 1):
        train(model, device, optimizer=optimizer, epoch=epoch)
        test(model, device)
        scheduler.step()


Test set: Average loss: 0.3281, Accuracy: 8821/10000 (88.21%)


Test set: Average loss: 0.1447, Accuracy: 9819/10000 (98.19%)


Test set: Average loss: 0.1627, Accuracy: 9851/10000 (98.51%)



In [13]:
## export model and get calibration_config

model_path = "./log/mnist_model.pth"
calibration_path = "./log/mnist_calibration.pth"
calibration_config = quantizer.export_model(model_path, calibration_path)

print("calibration_config: ", calibration_config)

[2022-10-11 17:46:10] [32mModel state_dict saved to ./log/mnist_model.pth[0m
[2022-10-11 17:46:10] [32mMask dict saved to ./log/mnist_calibration.pth[0m
calibration_config:  {'conv1': {'weight_bits': 8}, 'conv2': {'weight_bits': 8}, 'fc1': {'weight_bits': 8}, 'fc2': {'weight_bits': 8}}


In [14]:
## Performance 
start = time.time()
best_acc = test(model, device)
test_time = time.time() - start

print(f'Pretrained model Accuracy: {best_acc: .2f}%, Test-time: {test_time: .4f}s, Speed-up: {pre_test_time/test_time: .2f}x')


Test set: Average loss: 0.1627, Accuracy: 9851/10000 (98.51%)

Pretrained model Accuracy:  98.51%, Test-time:  1.5880s, Speed-up:  1.07x
