## Example ADVANCED mode recipe - SmoothQuant, calibration & GPTQ


In [1]:
import warnings
warnings.filterwarnings("ignore")


1. Instantiate a `torch` model from source, HF hub in this case.

In [2]:
from transformers import pipeline

model = "openai/clip-vit-base-patch32"
task = "zero-shot-image-classification"

task_cases = [
    dict(
        images="http://images.cocodataset.org/val2017/000000039769.jpg",
        candidate_labels=[
            "a photo of cats",
            "a photo of dogs",
        ],
    ),
    dict(
        images="http://images.cocodataset.org/val2017/000000397133.jpg",
        candidate_labels=[
            "a kitchen scene",
            "a living room scene",
        ],
    ),
]

pipe = pipeline(
    task=task,
    model=model,
    device_map="auto",
)

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0


[[{'score': 0.9988459348678589, 'label': 'a photo of cats'},
  {'score': 0.0011540568666532636, 'label': 'a photo of dogs'}],
 [{'score': 0.9962789416313171, 'label': 'a kitchen scene'},
  {'score': 0.0037210225127637386, 'label': 'a living room scene'}]]

2. Transform into `DmxModel`; this does not change the functional behavior.

In [3]:
from dmx.compressor import DmxModel

pipe.model = DmxModel.from_torch(pipe.model)

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

[[{'score': 0.9988459348678589, 'label': 'a photo of cats'},
  {'score': 0.0011540546547621489, 'label': 'a photo of dogs'}],
 [{'score': 0.9962789416313171, 'label': 'a kitchen scene'},
  {'score': 0.003721001325175166, 'label': 'a living room scene'}]]

3. Apply `DmxSmoothQuantRecipe`; this does not change functional behavior bu makes subsequent quantization easier.  

In [4]:
from dmx.compressor import nn

from dmx.compressor.advanced_recipe import (
    DmxModuleSmoothQuantHyperparams,
    DmxSmoothQuantRecipe,
)


def hp_gen(_model) -> dict:
    return {
        _m: DmxModuleSmoothQuantHyperparams(
            migration_strength=0.25,
            fuse_to_weight=True,
        )
        for _, _m in _model.named_dmx_modules()
        if isinstance(_m, (nn.Linear, nn.Conv2d))
    }


with DmxSmoothQuantRecipe(hp_gen).applied_to(pipe.model):
    pipe(**task_cases[0])

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

[[{'score': 0.9988459348678589, 'label': 'a photo of cats'},
  {'score': 0.0011540590785443783, 'label': 'a photo of dogs'}],
 [{'score': 0.9962789416313171, 'label': 'a kitchen scene'},
  {'score': 0.0037210651207715273, 'label': 'a living room scene'}]]

4. Quantization; this, without subsequent calibration, should degrade accuracy. 

In [5]:
from dmx.compressor import DmxConfigRule, format, nn

DmxConfigRule(
    module_types=(nn.Linear,),
    module_config=dict(
        input_formats=[format.INT8],
        weight_format=format.INT4,
    ),
).apply_to(pipe.model)

DmxConfigRule(
    module_types=(nn.ActActMatMul,),
    module_config=dict(
        input_formats=[format.INT8, format.INT8],
    ),
).apply_to(pipe.model)

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

[[{'score': 0.5, 'label': 'a photo of cats'},
  {'score': 0.5, 'label': 'a photo of dogs'}],
 [{'score': 0.5, 'label': 'a kitchen scene'},
  {'score': 0.5, 'label': 'a living room scene'}]]

5. Calibration; this should restore some accuracy. 

In [6]:
from dmx.compressor.advanced_recipe import (
    DmxQuantizerCalibrationHyperparams,
    DmxModuleQuantizerCalibrationHyperparams,
    DmxQuantizerCalibrationRecipe,
)


def hp_gen(_model) -> dict:
    aw_hp = {
        _m: DmxModuleQuantizerCalibrationHyperparams(
            inputs={"input_cast": DmxQuantizerCalibrationHyperparams()},
            weight=DmxQuantizerCalibrationHyperparams(),
        )
        for _, _m in _model.named_dmx_modules()
        if isinstance(_m, (nn.Linear, nn.Conv2d))
    }
    aa_hp = {
        _m: DmxModuleQuantizerCalibrationHyperparams(
            inputs={
                "input_cast": DmxQuantizerCalibrationHyperparams(),
                "multiplier_cast": DmxQuantizerCalibrationHyperparams(),
            },
        )
        for _, _m in _model.named_dmx_modules()
        if isinstance(_m, nn.ActActMatMul)
    }
    return aw_hp | aa_hp


with DmxQuantizerCalibrationRecipe(hp_gen).applied_to(pipe.model):
    pipe(**task_cases[0])  # calibrate on the first task case example

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


[[{'score': 0.9793279767036438, 'label': 'a photo of cats'},
  {'score': 0.02067197673022747, 'label': 'a photo of dogs'}],
 [{'score': 0.7676579356193542, 'label': 'a kitchen scene'},
  {'score': 0.23234213888645172, 'label': 'a living room scene'}]]

6. GPTQ; this should restore some accuracy.

In [7]:
from dmx.compressor.advanced_recipe import (
    DmxModuleGPTQHyperparams,
    DmxGPTQRecipe,
)


def hp_gen(_model) -> dict:
    return {
        _m: DmxModuleGPTQHyperparams()
        for _, _m in _model.named_dmx_modules()
        if isinstance(_m, (nn.Linear, nn.Conv2d))
    }

with DmxGPTQRecipe(hp_gen).applied_to(pipe.model):
    pipe(**task_cases[0])  # calibrate on the first task case example

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

[[{'score': 0.9814556241035461, 'label': 'a photo of cats'},
  {'score': 0.018544413149356842, 'label': 'a photo of dogs'}],
 [{'score': 0.8038136959075928, 'label': 'a kitchen scene'},
  {'score': 0.19618624448776245, 'label': 'a living room scene'}]]