# Model Quantization
## Quantization Aware Training using Intel Neural Compressor

In [1]:
import os

os.environ["LOGLEVEL"] = "CRITICAL"
import lightning as L
import torch
from lightning.pytorch import seed_everything
from lightning.pytorch.callbacks import EarlyStopping, RichModelSummary, RichProgressBar
from neural_compressor.config import QuantizationAwareTrainingConfig
from neural_compressor.training import fit, prepare_compression

from dataset import PetDataModule
from model import PetClassifier
from utils import print_size_of_model

In [2]:
%reload_ext watermark
%watermark --iversions

torch    : 2.1.0
lightning: 2.1.1



In [3]:
CHECKPOINT = "/home/ababu/mlw_2023/checkpoints/epoch=8-step=720-v1.ckpt"
BATCH_SIZE = 32
NUM_WORKERS = 8
DEVICE = "cpu"
SEED = 42
seed_everything(SEED, workers=True)

Seed set to 42


42

In [4]:
dm = PetDataModule(BATCH_SIZE, NUM_WORKERS)

In [5]:
torch.set_flush_denormal(True)

True

In [6]:
def train_func(model_n):
    setattr(base_model, "model", model_n)
    bar = RichProgressBar()
    early_stopping = EarlyStopping("val_loss", patience=5)
    trainer = L.Trainer(accelerator="gpu", callbacks=[bar, early_stopping])
    trainer.fit(base_model, datamodule=dm)
    return base_model

In [7]:
conf = QuantizationAwareTrainingConfig()

lr = 1e-4
weight_decay = 1e-6
num_classes = 37
base_model = PetClassifier(num_classes, lr, weight_decay)

compression_manager = prepare_compression(base_model, conf)
compression_manager.callbacks.on_train_begin()
qmodel = compression_manager.model
train_func(qmodel)
compression_manager.callbacks.on_train_end()
compression_manager.save("./qat_output")

  torch.has_cuda,
  torch.has_cudnn,
  torch.has_mps,
  torch.has_mkldnn,
Trainer will use only 1 of 3 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=3)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/ababu/mlw_2023/.venv1/lib/python3.10/site-packages/lightning/pytorch/loops/utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2]


Output()

In [8]:
bar = RichProgressBar()
summary = RichModelSummary()
trainer = L.Trainer(accelerator=DEVICE, callbacks=[bar, summary])

Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.rich_model_summary.RichModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/home/ababu/mlw_2023/.venv1/lib/python3.10/site-packages/lightning/pytorch/trainer/setup.py:187: GPU available but not used. You can set it by doing `Trainer(accelerator='gpu')`.


In [9]:
setattr(base_model, "model", qmodel.model)
trainer.test(model=base_model, datamodule=dm)

Output()

[{'test_acc_epoch': 0.492324560880661}]

In [10]:
%%timeit
base_model.eval()
with torch.no_grad():
    base_model(torch.rand((1, 3, 224, 224), dtype=torch.float32))

20.2 ms ± 1.1 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [11]:
print_size_of_model(base_model.model)

In [13]:
model = PetClassifier.load_from_checkpoint(CHECKPOINT)
device = torch.device("cpu")
model.to(device)
print_size_of_model(model)

In [15]:
%%timeit
model.eval()
with torch.no_grad():
    model(torch.rand((1, 3, 224, 224), dtype=torch.float32))

25.6 ms ± 471 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
