## Example ADVANCED mode recipe - normalization layer extra parameters tuning by SLaNC


In [1]:
import warnings
warnings.filterwarnings("ignore")


1. Instantiate a `torch` model from source, HF hub in this case.

In [2]:
from transformers import pipeline
from transformers import CLIPModel
model = "openai/clip-vit-base-patch32"
task = "zero-shot-image-classification"

task_cases = [
    dict(
        images="http://images.cocodataset.org/val2017/000000039769.jpg",
        candidate_labels=[
            "a photo of cats",
            "a photo of dogs",
        ],
    ),
    dict(
        images="http://images.cocodataset.org/val2017/000000397133.jpg",
        candidate_labels=[
            "a kitchen scene",
            "a living room scene",
        ],
    ),
]

pipe = pipeline(
    task=task,
    model=model,
    device_map="auto",
)

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0


[[{'score': 0.9988455772399902, 'label': 'a photo of cats'},
  {'score': 0.0011544461594894528, 'label': 'a photo of dogs'}],
 [{'score': 0.9962840676307678, 'label': 'a kitchen scene'},
  {'score': 0.0037159069906920195, 'label': 'a living room scene'}]]

2. Transform into `DmxModel`; this does not change the functional behavior.

In [3]:
from dmx.compressor import DmxModel

pipe.model = DmxModel.from_torch(pipe.model)

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

[[{'score': 0.9988455772399902, 'label': 'a photo of cats'},
  {'score': 0.0011544550070539117, 'label': 'a photo of dogs'}],
 [{'score': 0.9962841868400574, 'label': 'a kitchen scene'},
  {'score': 0.0037158718332648277, 'label': 'a living room scene'}]]

3. Configure to BASIC mode; this should bring in all VSIMD approximations with default config.

In [4]:
pipe.model.to_basic_mode()

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

[[{'score': 0.9199306964874268, 'label': 'a photo of cats'},
  {'score': 0.08006926625967026, 'label': 'a photo of dogs'}],
 [{'score': 0.9805154800415039, 'label': 'a kitchen scene'},
  {'score': 0.019484540447592735, 'label': 'a living room scene'}]]

4. SLaNC calibrate `LayerNorm` instances.

In [12]:
from dmx.compressor import nn
import re
from dmx.compressor.advanced_recipe import (
    DmxSLaNCHyperparams,
    DmxSLaNCRecipe,
)

def get_clip_slanc_layers(model):
    assert model.class_for_deserialization == CLIPModel    
    _hp = {}
    n_layers = len(model.text_model.encoder.layers)
    assert n_layers == len(model.vision_model.encoder.layers)
    for layer_stack in (model._gm.text_model.encoder.layers,model._gm.vision_model.encoder.layers):
        for idx in range(n_layers):
            #Keep the first lnorm layer in the stack at default scale
            if idx > 0:
                _hp[layer_stack.get_submodule(str(idx)).layer_norm1] = DmxSLaNCHyperparams(position="post_mlp",
                                                                        mlp_type="standard",
                                                                        device=layer_stack.get_submodule(str(idx)).layer_norm1.weight.device,
                                                                        prev_ln_weight=layer_stack.get_submodule(str(idx - 1)).layer_norm2,
                                                                        fc1=layer_stack.get_submodule(str(idx - 1)).mlp.fc1,
                                                                        fc2=layer_stack.get_submodule(str(idx - 1)).mlp.fc2
                )
            else:
                _hp[layer_stack.get_submodule(str(idx)).layer_norm1] = DmxSLaNCHyperparams(position="first",
                                                                        device=layer_stack.get_submodule(str(idx)).layer_norm1.weight.device
                                                                        )
            _hp[layer_stack.get_submodule(str(idx)).layer_norm2] = DmxSLaNCHyperparams(position="post_attn",
                                                                        device=layer_stack.get_submodule(str(idx)).layer_norm2.weight.device,
                                                                        prev_ln_weight=layer_stack.get_submodule(str(idx)).layer_norm1,
                                                                        v_proj=layer_stack.get_submodule(str(idx)).self_attn.v_proj,
                                                                        o_proj=layer_stack.get_submodule(str(idx)).self_attn.out_proj,
                                                                        )
    #special cases
    _hp[model._gm.vision_model.pre_layrnorm] = DmxSLaNCHyperparams(position="first",
                                                                device=model._gm.vision_model.pre_layrnorm.weight.device
                                                                )
    _hp[model._gm.text_model.final_layer_norm] = DmxSLaNCHyperparams(position="post_mlp",
                                                                mlp_type="standard",
                                                                device=model._gm.text_model.final_layer_norm.weight.device,
                                                                prev_ln_weight=model.text_model.encoder.layers[-1].layer_norm2,
                                                                fc1=model.text_model.encoder.layers[-1].mlp.fc1,
                                                                fc2=model.text_model.encoder.layers[-1].mlp.fc2
                                                                )

    _hp[model._gm.vision_model.post_layernorm] = DmxSLaNCHyperparams(position="post_mlp",
                                                                mlp_type="standard",
                                                                device=model._gm.vision_model.post_layernorm.weight.device,
                                                                prev_ln_weight=model.vision_model.encoder.layers[-1].layer_norm2,
                                                                fc1=model.vision_model.encoder.layers[-1].mlp.fc1,
                                                                fc2=model.vision_model.encoder.layers[-1].mlp.fc2
                                                                )
    return _hp


def hp_gen(_model) -> dict:
    if _model.class_for_deserialization == CLIPModel:
        return get_clip_slanc_layers(_model)
    else:
        raise ValueError(f'Unknown model class for extracting SLANC layers: {_model.class_for_deserialization}')
    

with DmxSLaNCRecipe(hp_gen).applied_to(pipe.model):
   print("SLaNC done!")

# -------------------------------------------------------------------------------
[pipe(**_tc) for _tc in task_cases]

SLaNC done!


[[{'score': 0.9986134767532349, 'label': 'a photo of cats'},
  {'score': 0.0013864935608580709, 'label': 'a photo of dogs'}],
 [{'score': 0.9974502921104431, 'label': 'a kitchen scene'},
  {'score': 0.002549670170992613, 'label': 'a living room scene'}]]

In [13]:
from dmx.compressor.modeling import DmxModule

complete_gm = list(pipe.model._gms.values())[0]
named_dmx_modules = [(n,m) for (n,m) in complete_gm.named_modules() if isinstance(m, DmxModule)]
for _n, _m in named_dmx_modules:
    if isinstance(_m, nn.LayerNorm):
        print(f"Name: {_n}, norm: {_m.approximator.function.extra_params}")

Name: vision_model.pre_layrnorm, norm: {'norm': 1.0}
Name: vision_model.encoder.layers.0.layer_norm1, norm: {'norm': 1.0}
Name: vision_model.encoder.layers.0.layer_norm2, norm: {'norm': tensor(0.0564, device='cuda:0')}
Name: vision_model.encoder.layers.1.layer_norm1, norm: {'norm': tensor(0.0298, device='cuda:0')}
Name: vision_model.encoder.layers.1.layer_norm2, norm: {'norm': tensor(0.0383, device='cuda:0')}
Name: vision_model.encoder.layers.2.layer_norm1, norm: {'norm': tensor(0.1633, device='cuda:0')}
Name: vision_model.encoder.layers.2.layer_norm2, norm: {'norm': tensor(0.0413, device='cuda:0')}
Name: vision_model.encoder.layers.3.layer_norm1, norm: {'norm': tensor(0.2988, device='cuda:0')}
Name: vision_model.encoder.layers.3.layer_norm2, norm: {'norm': tensor(0.0334, device='cuda:0')}
Name: vision_model.encoder.layers.4.layer_norm1, norm: {'norm': tensor(0.2830, device='cuda:0')}
Name: vision_model.encoder.layers.4.layer_norm2, norm: {'norm': tensor(0.0310, device='cuda:0')}
Name: