## Example ADVANCED mode recipe - normalization layer extra parameters tuning by SLaNC


In [2]:
import warnings
warnings.filterwarnings("ignore")


1. Instantiate a `torch` model from source, HF hub in this case.

In [3]:
from transformers import pipeline
from transformers import CLIPModel
model = "openai/clip-vit-base-patch32"
task = "zero-shot-image-classification"

task_cases = [
    dict(
        images="http://images.cocodataset.org/val2017/000000039769.jpg",
        candidate_labels=[
            "a photo of cats",
            "a photo of dogs",
        ],
    ),
    dict(
        images="http://images.cocodataset.org/val2017/000000397133.jpg",
        candidate_labels=[
            "a kitchen scene",
            "a living room scene",
        ],
    ),
]

pipe = pipeline(
    task=task,
    model=model,
    device_map="auto",
)

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Device set to use cuda:0


[[{'score': 0.9988459348678589, 'label': 'a photo of cats'},
  {'score': 0.0011540568666532636, 'label': 'a photo of dogs'}],
 [{'score': 0.9962789416313171, 'label': 'a kitchen scene'},
  {'score': 0.0037210225127637386, 'label': 'a living room scene'}]]

2. Transform into `DmxModel`; this does not change the functional behavior.

In [4]:
from dmx.compressor import DmxModel

pipe.model = DmxModel.from_torch(pipe.model)

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

[[{'score': 0.9988459348678589, 'label': 'a photo of cats'},
  {'score': 0.0011540524428710341, 'label': 'a photo of dogs'}],
 [{'score': 0.9962789416313171, 'label': 'a kitchen scene'},
  {'score': 0.003721001325175166, 'label': 'a living room scene'}]]

3. Configure to BASIC mode; this should bring in all VSIMD approximations with default config.

In [5]:
pipe.model.to_basic_mode()

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

[[{'score': 0.843508780002594, 'label': 'a photo of cats'},
  {'score': 0.1564912348985672, 'label': 'a photo of dogs'}],
 [{'score': 0.938322126865387, 'label': 'a kitchen scene'},
  {'score': 0.061677876859903336, 'label': 'a living room scene'}]]

4. SLaNC calibrate `LayerNorm` instances.

In [6]:
from dmx.compressor import nn
from dmx.compressor.advanced_recipe import (
    DmxSLaNCHyperparams,
    DmxSLaNCRecipe,
)

def get_clip_slanc_layers(model):
    assert model.class_for_deserialization == CLIPModel    
    _hp = {}
    n_layers = len(model.text_model.encoder.layers)
    assert n_layers == len(model.vision_model.encoder.layers)
    for layer_stack in (model._gm.text_model.encoder.layers,model._gm.vision_model.encoder.layers):
        for idx in range(n_layers):
            #Keep the first lnorm layer in the stack at default scale
            if idx > 0:
                _hp[layer_stack.get_submodule(str(idx)).layer_norm1] = DmxSLaNCHyperparams(position= "post_mlp",
                                                                        prev_layer= layer_stack.get_submodule(str(idx-1)).mlp,
                                                                        prev_ln_weight= layer_stack.get_submodule(str(idx-1)).layer_norm2.weight
                                                                        )
            _hp[layer_stack.get_submodule(str(idx)).layer_norm2] = DmxSLaNCHyperparams(position="post_attn",
                                                         prev_layer=layer_stack.get_submodule(str(idx)).self_attn,
                                                         prev_ln_weight=layer_stack.get_submodule(str(idx)).layer_norm1.weight
                                                         )
    #special cases
    _hp[model._gm.text_model.final_layer_norm] = DmxSLaNCHyperparams(position="post_mlp",
                                                                 prev_layer=model.text_model.encoder.layers[-1].mlp,
                                                                 prev_ln_weight=model.text_model.encoder.layers[-1].layer_norm2.weight
                                                                 )

    _hp[model._gm.vision_model.post_layernorm] = DmxSLaNCHyperparams(position="post_mlp",
                                                                 prev_layer=model.vision_model.encoder.layers[-1].mlp,
                                                                 prev_ln_weight=model.vision_model.encoder.layers[-1].layer_norm2.weight
                                                                 )


    return _hp

def hp_gen(_model) -> dict:
    if _model.class_for_deserialization == CLIPModel:
        return get_clip_slanc_layers(_model)
    else:
        raise ValueError(f'Unknown model class for extracting SLANC layers: {_model.class_for_deserialization}')
    

with DmxSLaNCRecipe(hp_gen).applied_to(pipe.model):
   print("SLaNC done!")

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

SLaNC done!


[[{'score': 0.8301814794540405, 'label': 'a photo of cats'},
  {'score': 0.16981850564479828, 'label': 'a photo of dogs'}],
 [{'score': 0.9074352979660034, 'label': 'a kitchen scene'},
  {'score': 0.092564657330513, 'label': 'a living room scene'}]]