## Example ADVANCED mode recipe - normalization layer extra parameters tuning by SLaNC


In [1]:
import warnings
warnings.filterwarnings("ignore")

import torch
import sys
import copy
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from datasets import load_dataset
from evaluate import load
from dmx.compressor.modeling import DmxModel

def normalize(processor, text):
    try:
        res = processor.tokenizer.normalize(text)
    except:
        res = text.lower().strip()
    return res

def run_evaluation(pipe, dataset_list, processor,wer_metric,eval_name):
    """Helper function to run evaluation and return predictions/references"""
    predictions = []
    references = []
    
    print(f"Evaluating on {len(dataset_list)} samples...")
    
    for i, sample in enumerate(dataset_list):
        if i % 1 == 0:
            print(f"Processed {i}/{len(dataset_list)} samples")

        audio = sample["audio"]["array"]
        ground_truth = sample["text"]

        result = pipe(audio, return_timestamps=True)
        prediction = result["text"]

        predictions.append(normalize(processor, prediction))
        references.append(normalize(processor, ground_truth))
    wer_score = wer_metric.compute(predictions=predictions, references=references)
    print(f'***********{eval_name}\n prediction: {predictions} \n references: {references} \n wer: {wer_score}')
    return predictions, references , wer_score


TypeError: unsupported operand type(s) for |: 'type' and 'ABCMeta'


1. Instantiate a `torch` model from source, HF hub in this case.

In [None]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
wer_metric = load("wer")
model_id = "openai/whisper-medium"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model = model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
task = "automatic-speech-recognition"

pipe = pipeline(
    task=task,
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device
)

dataset = load_dataset(
    "librispeech_asr", "clean", split="validation", streaming=True, trust_remote_code=True
)
dataset = dataset.take(2)
dataset_list = list(dataset)
predictions_gt, references_gt, wer_gt = run_evaluation(pipe, dataset_list, processor, wer_metric, 'vanilla')

Device set to use cuda:0
Due to a bug fix in https://github.com/huggingface/transformers/pull/28687 transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English.This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`.


Evaluating on 2 samples...
Processed 0/2 samples
Processed 1/2 samples
***********vanilla
 prediction: ['he was in a fevered state of mind owing to the blight his wife is action threatened to cast upon his entire future', 'he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did'] 
 references: ['he was in a fevered state of mind owing to the blight his wife is action threatened to cast upon his entire future', 'he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did'] 
 wer: 0.0


2. Transform into `DmxModel`; this does not change the functional behavior.

In [None]:
pipe.model = DmxModel.from_torch(pipe.model)

# -------------------------------------------------------------------------------
predictions_baseline, references_baseline, wer_baseline = run_evaluation(pipe, dataset_list, processor, wer_metric, 'baseline')

Evaluating on 2 samples...
Processed 0/2 samples


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.43.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Processed 1/2 samples
***********baseline
 prediction: ['he was in a fevered state of mind owing to the blight his wife is action threatened to cast upon his entire future', 'he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did'] 
 references: ['he was in a fevered state of mind owing to the blight his wife is action threatened to cast upon his entire future', 'he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did'] 
 wer: 0.0


3. Configure to BASIC mode; this should bring in all VSIMD approximations with default config.

In [None]:
pipe.model.to_basic_mode()

# -------------------------------------------------------------------------------
predictions_basic, references_basic, wer_basic = run_evaluation(pipe, dataset_list, processor, wer_metric, 'basic')

Evaluating on 2 samples...
Processed 0/2 samples
Processed 1/2 samples
***********basic
 prediction: ['he was in a fevered state of mind owing to the blight his wife is action threatened to cast upon his entire future', 'he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did'] 
 references: ['he was in a fevered state of mind owing to the blight his wife is action threatened to cast upon his entire future', 'he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did'] 
 wer: 0.0


4. SLaNC calibrate `LayerNorm` instances.

In [None]:
from dmx.compressor import nn
from dmx.compressor.modeling import DmxModule
import re
from dmx.compressor.advanced_recipe import (
    DmxSLaNCHyperparams,
    DmxSLaNCRecipe,
)


def hp_gen(_model) -> dict:
    _hp = {}
    complete_gm = list(_model._gms.values())[0]
    named_dmx_modules = [(n,m) for (n,m) in complete_gm.named_modules() if isinstance(m, DmxModule)]

    for _n, _m in named_dmx_modules:
        if isinstance(_m, nn.LayerNorm):
            if ".layer_norm" in _n:
                # final layer norm
                layers = pipe.model.get_submodule(_n.replace(".layer_norm", ".layers", -1))
                layers = list(layers.children())
                _hp[_m] = DmxSLaNCHyperparams(
                    position="post_mlp",
                    mlp_type="standard",
                    device=_m.weight.device,
                    prev_ln_weight=layers[-1].final_layer_norm,
                    fc1=layers[-1].fc1,
                    fc2=layers[-1].fc2
                )
            elif "self_attn_layer_norm" in _n and ".0." not in _n:
                layer_num = int(''.join(re.findall(r'\.\d+\.', _n)).replace(".", "", -1))
                _hp[_m] = DmxSLaNCHyperparams(
                    position="post_mlp",
                    mlp_type="standard",
                    device=_m.weight.device,
                    prev_ln_weight=pipe.model.get_submodule(
                        _n.replace("self_attn_layer_norm", "final_layer_norm", -1)
                        .replace("." + str(layer_num) + ".", "." + str(layer_num - 1) + ".", -1)),
                    fc1=pipe.model.get_submodule(
                        _n.replace("self_attn_layer_norm", "fc1", -1)
                        .replace("." + str(layer_num) + ".", "." + str(layer_num - 1) + ".", -1)),
                    fc2=pipe.model.get_submodule(
                        _n.replace("self_attn_layer_norm", "fc2", -1)
                        .replace("." + str(layer_num) + ".", "." + str(layer_num - 1) + ".", -1))
                )
            elif "encoder_attn_layer_norm" in _n:
                _hp[_m] = DmxSLaNCHyperparams(
                    position="post_attn",
                    device=_m.weight.device,
                    prev_ln_weight=pipe.model.get_submodule(
                        _n.replace("encoder_attn_layer_norm", "self_attn_layer_norm", -1)),
                    v_proj=pipe.model.get_submodule(
                        _n.replace("encoder_attn_layer_norm", "self_attn.v_proj", -1)),
                    o_proj=pipe.model.get_submodule(
                        _n.replace("encoder_attn_layer_norm", "encoder_attn.out_proj", -1))
                )
            elif "final_layer_norm" in _n:
                if ".encoder." in _n:
                    prev_ln_weight = pipe.model.get_submodule(
                        _n.replace("final_layer_norm", "self_attn_layer_norm", -1)
                    )
                    v_proj = pipe.model.get_submodule(
                        _n.replace("final_layer_norm", "self_attn.v_proj", -1)
                    )
                    o_proj = pipe.model.get_submodule(
                        _n.replace("final_layer_norm", "self_attn.out_proj", -1)
                    )
                elif ".decoder." in _n:
                    prev_ln_weight = pipe.model.get_submodule(
                        _n.replace("final_layer_norm", "encoder_attn_layer_norm", -1)
                    )
                    v_proj=pipe.model.get_submodule(
                        _n.replace(".decoder.", ".encoder.", -1)
                        .replace("final_layer_norm", "self_attn.v_proj", -1)
                    )
                    o_proj=pipe.model.get_submodule(
                        _n.replace("final_layer_norm", "encoder_attn.out_proj", -1)
                    )
                _hp[_m] = DmxSLaNCHyperparams(
                    position="post_attn",
                    device=_m.weight.device,
                    prev_ln_weight=prev_ln_weight,
                    v_proj=v_proj,
                    o_proj=o_proj
                )
            elif "self_attn_layer_norm" in _n and ".0." in _n:
                _hp[_m] = DmxSLaNCHyperparams(
                    position="first",
                    device=_m.weight.device
                )
    return _hp

with DmxSLaNCRecipe(hp_gen).applied_to(pipe.model):
    print("SLaNC done!")

from dmx.compressor.modeling.model import DmxConfig
complete_gm = list(pipe.model._gms.values())[0]

all_modules_config  = DmxConfig({'_gm.'+n:m.dmx_config() for n,m in complete_gm.named_modules() if isinstance(m,DmxModule)})
pipe.model.configure(all_modules_config)
 
predictions_slanc,references_slanc,wer_slanc = run_evaluation(pipe,dataset_list,processor,wer_metric,'basic_slanc')

SLaNC done!
Name: model.encoder.layers.0.self_attn_layer_norm, norm: {'norm': 1.0}
Name: model.encoder.layers.0.final_layer_norm, norm: {'norm': tensor(0.0303, device='cuda:0')}
Name: model.encoder.layers.1.self_attn_layer_norm, norm: {'norm': tensor(0.0300, device='cuda:0')}
Name: model.encoder.layers.1.final_layer_norm, norm: {'norm': tensor(0.0395, device='cuda:0')}
Name: model.encoder.layers.2.self_attn_layer_norm, norm: {'norm': tensor(0.0313, device='cuda:0')}
Name: model.encoder.layers.2.final_layer_norm, norm: {'norm': tensor(0.0396, device='cuda:0')}
Name: model.encoder.layers.3.self_attn_layer_norm, norm: {'norm': tensor(0.0302, device='cuda:0')}
Name: model.encoder.layers.3.final_layer_norm, norm: {'norm': tensor(0.0414, device='cuda:0')}
Name: model.encoder.layers.4.self_attn_layer_norm, norm: {'norm': tensor(0.0315, device='cuda:0')}
Name: model.encoder.layers.4.final_layer_norm, norm: {'norm': tensor(0.0348, device='cuda:0')}
Name: model.encoder.layers.5.self_attn_layer_n

In [None]:
complete_gm = list(pipe.model._gms.values())[0]
named_dmx_modules = [(n,m) for (n,m) in complete_gm.named_modules() if isinstance(m, DmxModule)]

for _n, _m in named_dmx_modules:
    if isinstance(_m, nn.LayerNorm):
        print(f"Name: {_n}, norm: {_m.approximator.function.extra_params}")

Name: model.encoder.layers.0.self_attn_layer_norm, norm: {}
Name: model.encoder.layers.0.final_layer_norm, norm: {}
Name: model.encoder.layers.1.self_attn_layer_norm, norm: {}
Name: model.encoder.layers.1.final_layer_norm, norm: {}
Name: model.encoder.layers.2.self_attn_layer_norm, norm: {}
Name: model.encoder.layers.2.final_layer_norm, norm: {}
Name: model.encoder.layers.3.self_attn_layer_norm, norm: {}
Name: model.encoder.layers.3.final_layer_norm, norm: {}
Name: model.encoder.layers.4.self_attn_layer_norm, norm: {}
Name: model.encoder.layers.4.final_layer_norm, norm: {}
Name: model.encoder.layers.5.self_attn_layer_norm, norm: {}
Name: model.encoder.layers.5.final_layer_norm, norm: {}
Name: model.encoder.layers.6.self_attn_layer_norm, norm: {}
Name: model.encoder.layers.6.final_layer_norm, norm: {}
Name: model.encoder.layers.7.self_attn_layer_norm, norm: {}
Name: model.encoder.layers.7.final_layer_norm, norm: {}
Name: model.encoder.layers.8.self_attn_layer_norm, norm: {}
Name: model.