In [1]:
from transformers import pipeline

# model = "openai/clip-vit-large-patch14"  # test with "openai/clip-vit-base-patch32" for faster dev iterations
model = "openai/clip-vit-base-patch32"
task = "zero-shot-image-classification"

task_case = dict(
    images="http://images.cocodataset.org/val2017/000000039769.jpg", 
    candidate_labels=[
        "a photo of cats", 
        "a photo of dogs", 
    ], 
)

pipe = pipeline(
    task=task, 
    model=model,
    device_map="auto",
)

print(pipe(**task_case))  # this is an inference run of the raw model directly from the customer

  from .autonotebook import tqdm as notebook_tqdm
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.48, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0


[{'score': 0.9988459348678589, 'label': 'a photo of cats'}, {'score': 0.0011540568666532636, 'label': 'a photo of dogs'}]


In [2]:
from dmx.compressor import DmxModel

pipe.model = DmxModel.from_torch(pipe.model)
pipe.model.to_basic_mode()

print(pipe(**task_case))  # this is a same inference run of the BASIC mode ML reference of the model on Corsair

If this is not desired, please set os.environ['TORCH_CUDA_ARCH_LIST'].


[{'score': 0.9989001750946045, 'label': 'a photo of cats'}, {'score': 0.0010998033685609698, 'label': 'a photo of dogs'}]


In [3]:
submodules_to_monitor = [
    "text_model.encoder.layers.0.layer_norm1", 
    "text_model.encoder.layers.0.mlp.activation_fn",
]

with pipe.model.monitoring(submodules_to_monitor):
    print(pipe(**task_case)) 

records = pipe.model.get_monitoring_records(submodules_to_monitor)

[{'score': 0.9989001750946045, 'label': 'a photo of cats'}, {'score': 0.0010998033685609698, 'label': 'a photo of dogs'}]
