In [1]:
import torch
import numpy as np
import torch.nn as nn
from transformers import Blip2Processor, Blip2ForConditionalGeneration, AutoProcessor, Blip2ForImageTextRetrieval
from datasets import COCODataset
from tqdm import tqdm
from PIL import Image
from torch.utils.data import DataLoader, RandomSampler
# from utils import print_model_structure

from collections import defaultdict
from functools import partial

import gc
import inspect
import random

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
model_name = "Salesforce/blip2-opt-2.7b"

model = Blip2ForConditionalGeneration.from_pretrained(model_name)
model = model.to(device)

processor = Blip2Processor.from_pretrained(model_name)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [4]:
model

Blip2ForConditionalGeneration(
  (vision_model): Blip2VisionModel(
    (embeddings): Blip2VisionEmbeddings(
      (patch_embedding): Conv2d(3, 1408, kernel_size=(14, 14), stride=(14, 14))
    )
    (encoder): Blip2Encoder(
      (layers): ModuleList(
        (0-38): 39 x Blip2EncoderLayer(
          (self_attn): Blip2Attention(
            (dropout): Dropout(p=0.0, inplace=False)
            (qkv): Linear(in_features=1408, out_features=4224, bias=True)
            (projection): Linear(in_features=1408, out_features=1408, bias=True)
          )
          (layer_norm1): LayerNorm((1408,), eps=1e-06, elementwise_affine=True)
          (mlp): Blip2MLP(
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=1408, out_features=6144, bias=True)
            (fc2): Linear(in_features=6144, out_features=1408, bias=True)
          )
          (layer_norm2): LayerNorm((1408,), eps=1e-06, elementwise_affine=True)
        )
      )
    )
    (post_layernorm): LayerNorm((

In [5]:

coco_dataset = COCODataset(ann_file='/nfshomes/vla/project_dirs/low-bit-vision/datasets/cocow/annotations/captions_val2017.json',
                           img_dir='/nfshomes/vla/project_dirs/low-bit-vision/datasets/cocow/images/val2017')

# calibration_set = [coco_dataset[0], coco_dataset[1]]

loading annotations into memory...
Done (t=0.03s)
creating index...
index created!


In [6]:

len(coco_dataset)

5000

In [7]:
# utils

@torch.no_grad()
def compute_loss(fp_output, q_output):
    fp_output_flat = fp_output.view(-1)
    q_output_flat = q_output.view(-1)
    L2 = torch.linalg.norm(fp_output_flat - q_output_flat, ord=2)
    return L2


# returns all nn.linear within module (a layer)
def get_named_linears(module):
    return {name: mod for name, mod in module.named_modules() if isinstance(mod, nn.Linear)}


def sanitize_kwargs(inputs_kwargs, module):
    """
    Remove the arguments that are not supported in the module's
    forward pass

    Args:
        inputs_kwargs (`dict`):
            The input dictionary to pass to the model layer
        module (`torch.nn.Module`):
            Target module to quantize.
    """
    module_signature = inspect.signature(module.forward).parameters
    sanitized_kwargs = {}
    for k, v in inputs_kwargs.items():
        if k in module_signature:
            sanitized_kwargs[k] = v
    return sanitized_kwargs


In [8]:
# base class for AWQ quantizer
class BaseAWQQuantizer():
    
    def __init__(self, model, device, inputs_processor, dataset, **kwargs):
        self.model = model
        self.device = device
        self.inputs_processor = inputs_processor
        self.dataset = dataset

        # QUANTIZATION SETTINGS
        self.w_bits = 4
        self.group_size = 128
        self.grid_search_size = 20
        self.zero_point = True

        # TODO: change to something appropriate
        # AWQ uses 128 for LLMs
        self.n_samples = 2
        
        self.run_model = None


    
    def pseudo_quantize_tensor(self, w: torch.Tensor):
        org_w_shape = w.shape
        if self.group_size > 0:
            assert org_w_shape[-1] % self.group_size == 0
            w = w.reshape(-1, self.group_size)
        assert w.dim() == 2
        assert torch.isnan(w).sum() == 0

        # zero point quantization
        if self.zero_point:
            max_val = w.amax(dim=1, keepdim=True)
            min_val = w.amin(dim=1, keepdim=True)
            max_int = 2**self.w_bits - 1
            min_int = 0
            scales = (max_val - min_val).clamp(min=1e-5) / max_int
            zeros = (-torch.round(min_val / scales)).clamp_(min_int, max_int)
            w = (
                torch.clamp(torch.round(w / scales) + zeros, min_int, max_int) - zeros
            ) * scales
            zeros = zeros.view(org_w_shape[0], -1)
        else:
            max_val = w.abs().amax(dim=1, keepdim=True)
            max_val = max_val.clamp(min=1e-5)
            max_int = 2 ** (self.w_bits- 1) - 1
            min_int = -(2 ** (self.w_bits - 1))
            scales = max_val / max_int
            zeros = None
            w = torch.clamp(torch.round(w / scales), min_int, max_int) * scales

        assert torch.isnan(scales).sum() == 0
        assert torch.isnan(w).sum() == 0

        scales = scales.view(org_w_shape[0], -1)
        w = w.reshape(org_w_shape)

        return w, scales, zeros


    @torch.no_grad
    def quantize(self):

        layer_groups = self._get_model_layer_groups()
        calibration_set = self._get_calibration_set()
        first_inputs, self.layer_args, self.layer_kwargs = self._gather_first_inputs(layer_groups, calibration_set)


        for layer_group, modules in layer_groups.items():

            self.inps = first_inputs[layer_group]
            for i in tqdm(range(len(modules)), desc= f"Quantizing {layer_group}"):
                
                if layer_group != 'qformer_layers':

                    layer = modules[i]

                    # nn.linear modules within layer to quantize
                    named_linears = get_named_linears(layer)
                    linear_inputs = self._gather_linear_inputs(layer, named_linears, layer_group)

                    # for k,v in linear_inputs.items():
                    #     print(f'{k}:{v.shape}')

                    
                    grouped_mods = self._group_modules_for_scaling(layer, linear_inputs, layer_group)

                    scales = [
                        self._compute_scales(layer, **group)
                        for group in grouped_mods    
                    ]

                    # print(scales)
                    # print(torch.unique(scales[0]))
                    # print(torch.unique(scales[1]))
                    # print(torch.unique(scales[2]))

                    # apply scales to prev_op and modules
                    for group, scale in zip(grouped_mods, scales):
                        
                        print('-'*80)
                        print(group['prev_op'].weight)
                        print('-'*80)
                        self._apply_scales(scale, group['prev_op'], group['modules'])
                        print('-'*80)
                        print(group['prev_op'].weight)
                        print('-'*80)            


                    # TODO: solve for and apply clipping

                # TODO:remove
                break
            
            # TODO:remove
            # break

            
        return layer_groups, first_inputs, self.layer_args, self.layer_kwargs, linear_inputs, scales
  

    def _gather_first_inputs(self, layer_groups, calibration_set):

        first_inputs = {}
        layer_args = {}
        layer_kwargs = {}

        # get input and kwargs to layer 0
        # use this Catcher hack cause forward hooks cannot capture kwargs
        class Catcher(nn.Module):
            def __init__(self, module, layer_group, is_last):
                super().__init__()
                self.module = module
                self.layer_group = layer_group
                self.is_last = is_last

            def forward(self, *args, **kwargs):
                # assume first input to forward is hidden states
                if len(args) > 0:
                    hidden_states = args[0]
                    # del args
                else:
                    first_key = list(kwargs.keys())[0]
                    hidden_states = kwargs.pop(first_key)

                first_inputs[self.layer_group] = hidden_states

                # preserve rest of positional arguments
                layer_args[self.layer_group] = args[1:]
                layer_kwargs[self.layer_group] = kwargs
                
                # early exit for last group of layers
                if self.is_last:
                    raise ValueError

                return self.module.forward(*args, **kwargs)

        keys = list(layer_groups.keys())

        for i in range(len(keys)):
            layer_group = keys[i]
            is_last = True if i == len(keys) - 1 else False

            modules = layer_groups[layer_group]
            modules[0] = Catcher(modules[0], layer_group, is_last)

        try:
            self.run_model(calibration_set)
        except ValueError:
            pass
        
        for _, modules in layer_groups.items():
            # restore proper module at beginning of layer group
            modules[0] = modules[0].module
        
        return first_inputs, layer_args, layer_kwargs
       

    def _gather_linear_inputs(self, layer, named_linears, layer_group):

        def input_hook(module, input, output, module_name, inputs):
            x = input[0]
            x = x.detach().cpu()

            # out = output[0]
            # out = out.detach().cpu()

            inputs[module_name].append(x)
        

        inputs = defaultdict(list)
        hooks = []
        
        for name, mod in named_linears.items():
            hooks.append(
                mod.register_forward_hook(partial(input_hook,
                                                  module_name = name, 
                                                  inputs = inputs))
            )

        # compute next set of inputs, grabbing linear inputs through the hooks
        self.inps = layer(self.inps, *self.layer_args[layer_group], **self.layer_kwargs[layer_group])
        self.inps = self.inps[0]

        # remove hooks from model
        for hook in hooks:
            hook.remove()

        inputs = {k: torch.cat(v, dim=0) for k, v in inputs.items()}

        return inputs
    
    def _compute_scales(self, layer, prev_op, modules, inp, parent_module, layer_args, layer_kwargs):

        def clear_memory(weight=None):
            if weight is not None:
                del weight
            gc.collect()
            torch.cuda.empty_cache()
    
        inp = inp.to(device)

        # block of weights concatted together
        W = torch.cat([mod.weight for mod in modules], dim = 0)
        orig_shape = W.shape
        W = W.view(-1, self.group_size)

        # rescale W to 0-1 scale
        W_scale = W.abs() / (W.abs().amax(dim=1, keepdim=True) + 1e-6)
        W_scale = W_scale.view(orig_shape)
        # per channel mean of normalized weights
        W_mean = W_scale.mean(0)
        W_mean = W_mean.view(-1)

        clear_memory(W)

        # per channel mean of input (activation)
        X_mean = inp.abs().view(-1, inp.shape[-1]).mean(0)
        X_mean = X_mean.view(-1)

        kwargs = sanitize_kwargs(layer_kwargs, parent_module)

        # compute full precision output
        with torch.no_grad():
            # fp_output = parent_module(inp, *layer_args, **kwargs)[0]
            fp_output = parent_module(inp, **kwargs)[0]
            # print(fp_output)

        
        # Grid search for best scales
        n_grid = self.grid_search_size
        history = []
        best_ratio = -1
        best_scales = None
        best_error = float("inf")

        org_sd = {k: v.cpu() for k, v in parent_module.state_dict().items()}

        for ratio in range(n_grid):
            scales = X_mean.pow(ratio).clamp(min=1e-4).view(-1)

            # avoid scaling values that overflow
            scales[torch.isinf(scales)] = 1
            scales[torch.isnan(scales)] = 1

            scales_view = scales.view(1, -1).to(device)
            # print(scales_view)

            # Q(W * s)
            # NOTE: only nn.linear modules for now

            # pseudo-quantize modules
            for mod in modules:
                mod.weight.mul_(scales_view)
                mod.weight.data = (
                    self.pseudo_quantize_tensor(mod.weight.data)[0] / scales_view
                )

            with torch.no_grad():
                # Q(W * s) * X
                # q_output = parent_module(inp, *layer_args, **layer_kwargs)[0]
                # q_output = parent_module(inp, *layer_args, **layer_kwargs)[0]
                q_output = parent_module(inp, **kwargs)[0]
            
            # Compute loss (L2 NORM)
            loss = compute_loss(fp_output, q_output)

            history.append(loss)
            if loss < best_error:
                best_error = loss
                best_ratio = ratio
                best_scales = scales.clone()

            # reset to original weights
            parent_module.load_state_dict(org_sd)

        assert best_ratio != -1, "best scales ratio never set"
        assert torch.isnan(best_scales).sum() == 0, best_scales

        return best_scales.detach().cpu()
    

    def _apply_scales(self, scale, prev_op, modules):

        scale = scale.to(device)
        prev_op = prev_op.to(device)

        if isinstance(prev_op, torch.nn.LayerNorm):
            prev_op.weight.div_(scale)

            if hasattr(prev_op, "bias") and prev_op.bias is not None:
                prev_op.bias.div_(scale)

        elif isinstance(prev_op, torch.nn.Linear):
            prev_op.weight[-scale.size(0) :].div_(scale.view(-1, 1))

        for fc in modules:
            fc.weight.mul_(scale.view(1, -1))

        # SANITY checks
        for p in prev_op.parameters():
            assert torch.isnan(p).sum() == 0
        for fc in modules:
            for p in fc.parameters():
                assert torch.isnan(p).sum() == 0

        prev_op.cpu()
        for fc in modules:
            fc.cpu()
        scale.cpu()



    # return layers of model to consider for quantization (modify with config file)
    def _get_model_layer_groups(self):
        raise NotImplementedError('_get_model_layers')
    
    def _get_calibration_set(self):
        raise NotImplementedError('_get_calibration_set')

    def _prepare_input(self):
        raise NotImplementedError('_prepare_input')
    
    def _group_modules_for_scaling(self, layer, linear_inputs, layer_group):
        raise NotImplementedError('_group_modules_for_scaling')
    

class Blip2ForConditionalGenerationAWQQuantizer(BaseAWQQuantizer):

    def __init__(self, model, inputs_processor, dataset):
        assert isinstance(model, Blip2ForConditionalGeneration)

        super().__init__(model, device, inputs_processor, dataset)
        self.run_model = model.generate
        
    def _get_model_layer_groups(self):
        # NOTE: returning all layers for now
        # NOTE: should ensure that keys are defined sequentially for early quitting of calibration set run
        return {'vit_layers': self.model.vision_model.encoder.layers,
                'qformer_layers': self.model.qformer.encoder.layer,
                'llm_layers': self.model.language_model.model.decoder.layers
               }

    def _get_calibration_set(self):
        # NOTE: small set for testing for now

        samples = []
        # n = 0

        random.seed(0)
        indices = random.sample(range(len(self.dataset)), self.n_samples)
        
        for i in indices:
            
            data = self.dataset[i]

            sample = self._prepare_input(data[0])
            samples.append(sample)
            
            # n += 1
            # if n == self.n_samples:
            #     break
        
        # NOTE: might have to break this up into batches, check gpu size
        samples = torch.cat(samples, dim = 0)
        return samples

    def _prepare_input(self, inp):
        X = self.inputs_processor(images=inp, return_tensors="pt").to(device)
        return X['pixel_values']
    
    def _group_modules_for_scaling(self, layer, linear_inputs, layer_group):
        grouped_mods = []

        if layer_group == 'vit_layers':
            
            grouped_mods.append(
                # vit self-attn
                dict(
                    prev_op = layer.layer_norm1,
                    modules = [layer.self_attn.qkv],
                    inp = linear_inputs['self_attn.qkv'],
                    parent_module = layer.self_attn,
                    layer_args = self.layer_args[layer_group],
                    layer_kwargs = self.layer_kwargs[layer_group]
                )
            )

            grouped_mods.append(
                # vit fc1
                dict(
                    prev_op = layer.layer_norm2,
                    modules = [layer.mlp.fc1],
                    inp = linear_inputs['mlp.fc1'],
                    parent_module = layer.mlp.fc1,
                    layer_args = self.layer_args[layer_group],
                    layer_kwargs = self.layer_kwargs[layer_group]
                )
            )

            grouped_mods.append(
                 # vit fc2
                dict(
                    prev_op = layer.mlp.fc1,
                    modules = [layer.mlp.fc2],
                    inp = linear_inputs['mlp.fc2'],
                    parent_module = layer.mlp.fc2,
                    layer_args = self.layer_args[layer_group],
                    layer_kwargs = self.layer_kwargs[layer_group]
                )
            )

        elif layer_group == 'qformer_layers':
            # TODO:
            pass
        elif layer_group == 'llm_layers':
            
            assert layer.do_layer_norm_before, "llm do_layer_norm_before set to false"

            # llm attn
            grouped_mods.append(
                dict(
                    prev_op = layer.self_attn_layer_norm,
                    modules = [
                        layer.self_attn.q_proj,
                        layer.self_attn.k_proj,
                        layer.self_attn.v_proj,
                    ],
                    inp = linear_inputs['self_attn.q_proj'],
                    parent_module = layer.self_attn,
                    layer_args = self.layer_args[layer_group],
                    layer_kwargs = self.layer_kwargs[layer_group]
                )
            )

            # llm attn output
            grouped_mods.append(
                dict(
                    prev_op = layer.self_attn.v_proj,
                    modules = [layer.self_attn.out_proj],
                    inp = linear_inputs['self_attn.out_proj'],
                    parent_module = layer.self_attn.out_proj,
                    layer_args = self.layer_args[layer_group],
                    layer_kwargs = self.layer_kwargs[layer_group]

                )
            )

            # LLM FC1
            grouped_mods.append(
                dict(
                    prev_op = layer.final_layer_norm,
                    modules = [layer.fc1],
                    inp = linear_inputs['fc1'],
                    parent_module = layer.fc1,
                    layer_args = self.layer_args[layer_group],
                    layer_kwargs = self.layer_kwargs[layer_group]
                )
            )

          # LLM FC2
            grouped_mods.append(
                dict(
                    prev_op = layer.fc1,
                    modules = [layer.fc2],
                    inp = linear_inputs['fc2'],
                    parent_module = layer.fc2,
                    layer_args = self.layer_args[layer_group],
                    layer_kwargs = self.layer_kwargs[layer_group]
                )
            )

        return grouped_mods
        

class Blip2ForImageTextRetrievalAWQQuantizer(BaseAWQQuantizer):

    def __init__(self, model, device, inputs_processor, dataset):
        assert isinstance(model, Blip2ForImageTextRetrieval)
        super().__init__(model, device, inputs_processor, dataset)
        self.run_model = model.forward
        
    def _get_model_layer_groups(self):
        # NOTE: returning all layers for now
        return [*[layer for layer in self.model.vision_model.encoder.layers],
                *[layer for layer in self.model.qformer.encoder.layer]]

    def _get_calibration_set(self):
        return [self.dataset[0], self.dataset[1]]

    def _prepare_input(self, batch):
        X = self.processor(images=batch[0], text=batch[1][0], return_tensors="pt").to(device, torch.float16)
        return X


In [9]:
b = Blip2ForConditionalGenerationAWQQuantizer(model, processor, coco_dataset)


In [10]:
layers, first_inputs, layer_args, layer_kwargs, linear_inputs, scales = b.quantize()

Expanding inputs for image tokens in BLIP-2 should be done in processing. Please follow instruction here (https://gist.github.com/zucchini-nlp/e9f20b054fa322f84ac9311d9ab67042) to update your BLIP-2 model. Using processors without these attributes in the config is deprecated and will throw an error in v4.47.
Quantizing vit_layers:   3%|▎         | 1/39 [00:00<00:27,  1.38it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([ 4.6849e-05,  1.7414e-03,  3.2692e-03,  ..., -6.4087e-04,
        -6.9559e-05,  2.2221e-03], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([ 4.6849e-05,  1.7414e-03,  3.2692e-03,  ..., -6.4087e-04,
        -6.9559e-05,  2.2221e-03], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([-5.0068e-06, -3.0923e-04, -3.2330e-04,  ...,  1.9634e-04,
         1.8036e-02,  1.0341e-04], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
---------------------------------------------------

Quantizing vit_layers:   5%|▌         | 2/39 [00:01<00:24,  1.51it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.5557e-05, 2.4988e-01, 3.2129e-01,  ..., 2.5757e-02, 2.1497e-01,
        1.3318e-01], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.5557e-05, 2.4988e-01, 3.2129e-01,  ..., 2.5757e-02, 2.1497e-01,
        1.3318e-01], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.0368, 0.1511, 0.1774,  ..., 0.0641, 0.1412, 0.1503], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter con

Quantizing vit_layers:   8%|▊         | 3/39 [00:01<00:23,  1.56it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([0.2023, 0.5015, 0.3770,  ..., 0.1025, 0.3689, 0.2905], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.2023, 0.5015, 0.3770,  ..., 0.1025, 0.3689, 0.2905],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.2830, 0.7329, 0.4973,  ..., 0.2173, 0.5293, 0.4739], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.2830, 0.7329, 0.4973,  ..., 0.2

Quantizing vit_layers:  10%|█         | 4/39 [00:02<00:22,  1.59it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([0.4053, 0.5308, 0.5830,  ..., 0.1854, 0.4678, 0.3311], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.4053, 0.5308, 0.5830,  ..., 0.1854, 0.4678, 0.3311],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.6836, 0.8633, 0.7520,  ..., 0.4539, 0.8457, 0.7241], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.6836, 0.8633, 0.7520,  ..., 0.4

Quantizing vit_layers:  13%|█▎        | 5/39 [00:03<00:21,  1.60it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([0.7363, 0.9302, 0.9043,  ..., 0.6382, 0.9717, 0.7847], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.7363, 0.9302, 0.9043,  ..., 0.6382, 0.9717, 0.7847],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.8530, 1.0820, 0.9204,  ..., 0.7344, 1.1123, 0.8569], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.8530, 1.0820, 0.9204,  ..., 0.7

Quantizing vit_layers:  15%|█▌        | 6/39 [00:03<00:20,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([0.7910, 0.9175, 0.8740,  ..., 0.7480, 1.0029, 0.8218], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.7910, 0.9175, 0.8740,  ..., 0.7480, 1.0029, 0.8218],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.0029, 1.1270, 0.9790,  ..., 1.0137, 1.2744, 1.0518], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.0029, 1.1270, 0.9790,  ..., 1.0

Quantizing vit_layers:  18%|█▊        | 7/39 [00:04<00:19,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([0.8438, 0.8105, 0.9648,  ..., 0.8735, 0.9800, 0.9302], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.8438, 0.8105, 0.9648,  ..., 0.8735, 0.9800, 0.9302],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.1553, 1.1094, 1.0967,  ..., 1.1133, 1.2480, 1.1338], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.1553, 1.1094, 1.0967,  ..., 1.1

Quantizing vit_layers:  21%|██        | 8/39 [00:05<00:19,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([0.9351, 0.8218, 0.9478,  ..., 0.9160, 0.8569, 0.8696], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.9351, 0.8218, 0.9478,  ..., 0.9160, 0.8569, 0.8696],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2627, 1.1846, 1.1582,  ..., 1.4414, 1.4189, 1.2627], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2627, 1.1846, 1.1582,  ..., 1.4

Quantizing vit_layers:  23%|██▎       | 9/39 [00:05<00:18,  1.60it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.0029, 0.8926, 1.0293,  ..., 1.3457, 0.9727, 1.2695], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.0029, 0.8926, 1.0293,  ..., 1.3457, 0.9727, 1.2695],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2793, 1.0059, 1.1270,  ..., 1.6709, 1.2090, 1.3252], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2793, 1.0059, 1.1270,  ..., 1.6

Quantizing vit_layers:  26%|██▌       | 10/39 [00:06<00:17,  1.61it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.1143, 0.7617, 0.9385,  ..., 1.4209, 0.8950, 1.2910], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.1143, 0.7617, 0.9385,  ..., 1.4209, 0.8950, 1.2910],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.4482, 1.0195, 1.1387,  ..., 1.8389, 1.3008, 1.5986], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.4482, 1.0195, 1.1387,  ..., 1.8

Quantizing vit_layers:  28%|██▊       | 11/39 [00:06<00:17,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2764, 0.7598, 1.0361,  ..., 1.8555, 0.9600, 1.5459], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2764, 0.7598, 1.0361,  ..., 1.8555, 0.9600, 1.5459],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.3926, 0.9316, 1.1172,  ..., 1.9707, 1.3154, 1.4785], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.3926, 0.9316, 1.1172,  ..., 1.9

Quantizing vit_layers:  31%|███       | 12/39 [00:07<00:16,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.3535, 0.8799, 1.0166,  ..., 2.0977, 1.1143, 1.4521], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.3535, 0.8799, 1.0166,  ..., 2.0977, 1.1143, 1.4521],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6699, 1.1592, 1.2959,  ..., 2.2637, 1.5518, 1.7129], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6699, 1.1592, 1.2959,  ..., 2.2

Quantizing vit_layers:  33%|███▎      | 13/39 [00:08<00:16,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2666, 0.8726, 1.0020,  ..., 1.9834, 1.0781, 1.4443], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2666, 0.8726, 1.0020,  ..., 1.9834, 1.0781, 1.4443],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7783, 1.0928, 1.2715,  ..., 2.3828, 1.5420, 1.7100], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7783, 1.0928, 1.2715,  ..., 2.3

Quantizing vit_layers:  36%|███▌      | 14/39 [00:08<00:15,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.3975, 0.7373, 0.9219,  ..., 2.0234, 1.0352, 1.6377], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.3975, 0.7373, 0.9219,  ..., 2.0234, 1.0352, 1.6377],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7217, 0.9868, 1.2090,  ..., 2.0762, 1.3994, 1.6660], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7217, 0.9868, 1.2090,  ..., 2.0

Quantizing vit_layers:  38%|███▊      | 15/39 [00:09<00:14,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.4746, 0.6973, 0.8506,  ..., 1.9229, 0.9873, 1.4785], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.4746, 0.6973, 0.8506,  ..., 1.9229, 0.9873, 1.4785],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7783, 1.0713, 1.2461,  ..., 1.9502, 1.4648, 1.7236], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7783, 1.0713, 1.2461,  ..., 1.9

Quantizing vit_layers:  41%|████      | 16/39 [00:09<00:14,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.5010, 0.8818, 0.9702,  ..., 1.8691, 1.0664, 1.5361], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.5010, 0.8818, 0.9702,  ..., 1.8691, 1.0664, 1.5361],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6494, 1.1416, 1.2822,  ..., 1.7480, 1.3438, 1.6162], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6494, 1.1416, 1.2822,  ..., 1.7

Quantizing vit_layers:  44%|████▎     | 17/39 [00:10<00:13,  1.60it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6650, 0.9238, 1.0781,  ..., 1.9102, 1.2061, 1.7197], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6650, 0.9238, 1.0781,  ..., 1.9102, 1.2061, 1.7197],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7754, 1.2188, 1.3818,  ..., 1.7725, 1.4102, 1.6631], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7754, 1.2188, 1.3818,  ..., 1.7

Quantizing vit_layers:  46%|████▌     | 18/39 [00:11<00:13,  1.61it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6797, 0.9199, 1.0498,  ..., 1.8535, 1.1914, 1.6768], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6797, 0.9199, 1.0498,  ..., 1.8535, 1.1914, 1.6768],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7705, 1.2217, 1.4473,  ..., 1.8066, 1.4482, 1.7383], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7705, 1.2217, 1.4473,  ..., 1.8

Quantizing vit_layers:  49%|████▊     | 19/39 [00:11<00:12,  1.61it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7080, 0.9468, 1.1602,  ..., 1.8418, 1.2568, 1.6914], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7080, 0.9468, 1.1602,  ..., 1.8418, 1.2568, 1.6914],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8721, 1.2607, 1.4854,  ..., 1.8770, 1.5361, 1.7949], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8721, 1.2607, 1.4854,  ..., 1.8

Quantizing vit_layers:  51%|█████▏    | 20/39 [00:12<00:11,  1.61it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7861, 0.9355, 1.1172,  ..., 1.8232, 1.2773, 1.7148], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7861, 0.9355, 1.1172,  ..., 1.8232, 1.2773, 1.7148],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9766, 1.2500, 1.5244,  ..., 1.9385, 1.5332, 1.8926], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9766, 1.2500, 1.5244,  ..., 1.9

Quantizing vit_layers:  54%|█████▍    | 21/39 [00:13<00:11,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7822, 0.9243, 1.1494,  ..., 1.7393, 1.2021, 1.7422], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7822, 0.9243, 1.1494,  ..., 1.7393, 1.2021, 1.7422],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9375, 1.2637, 1.5420,  ..., 1.8652, 1.5527, 1.8975], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9375, 1.2637, 1.5420,  ..., 1.8

Quantizing vit_layers:  56%|█████▋    | 22/39 [00:13<00:10,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8662, 0.8926, 1.1367,  ..., 1.6475, 1.2441, 1.7842], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8662, 0.8926, 1.1367,  ..., 1.6475, 1.2441, 1.7842],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8496, 1.2324, 1.5000,  ..., 1.7188, 1.4844, 1.8535], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8496, 1.2324, 1.5000,  ..., 1.7

Quantizing vit_layers:  59%|█████▉    | 23/39 [00:14<00:09,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7754, 0.9443, 1.1924,  ..., 1.5674, 1.2344, 1.7373], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7754, 0.9443, 1.1924,  ..., 1.5674, 1.2344, 1.7373],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7432, 1.2451, 1.4707,  ..., 1.6377, 1.4805, 1.7520], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7432, 1.2451, 1.4707,  ..., 1.6

Quantizing vit_layers:  62%|██████▏   | 24/39 [00:14<00:09,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7842, 1.0498, 1.3057,  ..., 1.5342, 1.3164, 1.7266], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7842, 1.0498, 1.3057,  ..., 1.5342, 1.3164, 1.7266],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6826, 1.2725, 1.4619,  ..., 1.5605, 1.4629, 1.6758], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6826, 1.2725, 1.4619,  ..., 1.5

Quantizing vit_layers:  64%|██████▍   | 25/39 [00:15<00:08,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8438, 1.1846, 1.3662,  ..., 1.5273, 1.4238, 1.7598], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8438, 1.1846, 1.3662,  ..., 1.5273, 1.4238, 1.7598],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6611, 1.3350, 1.4795,  ..., 1.5176, 1.4863, 1.6250], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6611, 1.3350, 1.4795,  ..., 1.5

Quantizing vit_layers:  67%|██████▋   | 26/39 [00:16<00:07,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8057, 1.2822, 1.4287,  ..., 1.4775, 1.5098, 1.7334], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8057, 1.2822, 1.4287,  ..., 1.4775, 1.5098, 1.7334],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6211, 1.3916, 1.4902,  ..., 1.4600, 1.4961, 1.6113], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6211, 1.3916, 1.4902,  ..., 1.4

Quantizing vit_layers:  69%|██████▉   | 27/39 [00:16<00:07,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7822, 1.3672, 1.5049,  ..., 1.4678, 1.5254, 1.6943], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7822, 1.3672, 1.5049,  ..., 1.4678, 1.5254, 1.6943],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6270, 1.3975, 1.4922,  ..., 1.4443, 1.4922, 1.5811], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6270, 1.3975, 1.4922,  ..., 1.4

Quantizing vit_layers:  72%|███████▏  | 28/39 [00:17<00:06,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7627, 1.4639, 1.5479,  ..., 1.4834, 1.5967, 1.7236], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7627, 1.4639, 1.5479,  ..., 1.4834, 1.5967, 1.7236],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6533, 1.4443, 1.5342,  ..., 1.4531, 1.5273, 1.6377], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6533, 1.4443, 1.5342,  ..., 1.4

Quantizing vit_layers:  74%|███████▍  | 29/39 [00:17<00:06,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8320, 1.5430, 1.6514,  ..., 1.5967, 1.7002, 1.7842], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8320, 1.5430, 1.6514,  ..., 1.5967, 1.7002, 1.7842],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7285, 1.5381, 1.6201,  ..., 1.5107, 1.5908, 1.7266], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7285, 1.5381, 1.6201,  ..., 1.5

Quantizing vit_layers:  77%|███████▋  | 30/39 [00:18<00:05,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9463, 1.6738, 1.7568,  ..., 1.6797, 1.8027, 1.8984], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9463, 1.6738, 1.7568,  ..., 1.6797, 1.8027, 1.8984],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7852, 1.5898, 1.6934,  ..., 1.5918, 1.6719, 1.7930], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.7852, 1.5898, 1.6934,  ..., 1.5

Quantizing vit_layers:  79%|███████▉  | 31/39 [00:19<00:04,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9639, 1.7168, 1.7646,  ..., 1.6885, 1.8115, 1.9688], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9639, 1.7168, 1.7646,  ..., 1.6885, 1.8115, 1.9688],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8887, 1.6924, 1.7773,  ..., 1.6758, 1.7920, 1.9209], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.8887, 1.6924, 1.7773,  ..., 1.6

Quantizing vit_layers:  82%|████████▏ | 32/39 [00:19<00:04,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9648, 1.7510, 1.8271,  ..., 1.7285, 1.8711, 1.9766], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.9648, 1.7510, 1.8271,  ..., 1.7285, 1.8711, 1.9766],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.0176, 1.7812, 1.8965,  ..., 1.7734, 1.9199, 2.0352], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.0176, 1.7812, 1.8965,  ..., 1.7

Quantizing vit_layers:  85%|████████▍ | 33/39 [00:20<00:03,  1.62it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([2.0664, 1.8711, 1.9102,  ..., 1.8262, 1.9961, 2.1777], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.0664, 1.8711, 1.9102,  ..., 1.8262, 1.9961, 2.1777],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.1582, 1.8887, 2.0234,  ..., 1.8994, 2.0449, 2.1777], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.1582, 1.8887, 2.0234,  ..., 1.8

Quantizing vit_layers:  87%|████████▋ | 34/39 [00:21<00:03,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([2.0195, 1.8711, 1.9277,  ..., 1.7783, 1.9434, 2.1113], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.0195, 1.8711, 1.9277,  ..., 1.7783, 1.9434, 2.1113],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.2891, 2.0020, 2.1543,  ..., 2.0312, 2.2031, 2.3301], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.2891, 2.0020, 2.1543,  ..., 2.0

Quantizing vit_layers:  90%|████████▉ | 35/39 [00:21<00:02,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([2.1074, 1.9473, 2.0430,  ..., 1.8555, 2.0645, 2.1895], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.1074, 1.9473, 2.0430,  ..., 1.8555, 2.0645, 2.1895],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.4531, 2.1680, 2.2949,  ..., 2.1719, 2.3594, 2.5059], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.4531, 2.1680, 2.2949,  ..., 2.1

Quantizing vit_layers:  92%|█████████▏| 36/39 [00:22<00:01,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([2.1680, 2.0098, 2.0469,  ..., 1.9014, 2.0625, 2.2520], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.1680, 2.0098, 2.0469,  ..., 1.9014, 2.0625, 2.2520],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.6348, 2.3301, 2.4922,  ..., 2.3555, 2.5020, 2.6758], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.6348, 2.3301, 2.4922,  ..., 2.3

Quantizing vit_layers:  95%|█████████▍| 37/39 [00:22<00:01,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([2.2051, 2.0156, 2.1172,  ..., 1.9688, 2.1582, 2.3242], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.2051, 2.0156, 2.1172,  ..., 1.9688, 2.1582, 2.3242],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.7324, 2.4512, 2.6309,  ..., 2.4570, 2.6484, 2.7891], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.7324, 2.4512, 2.6309,  ..., 2.4

Quantizing vit_layers:  97%|█████████▋| 38/39 [00:23<00:00,  1.63it/s]

--------------------------------------------------------------------------------
Parameter containing:
tensor([2.3242, 2.0684, 2.2031,  ..., 1.9971, 2.2285, 2.3945], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.3242, 2.0684, 2.2031,  ..., 1.9971, 2.2285, 2.3945],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.8477, 2.5859, 2.7480,  ..., 2.5859, 2.7656, 2.9023], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.8477, 2.5859, 2.7480,  ..., 2.5

Quantizing vit_layers: 100%|██████████| 39/39 [00:24<00:00,  1.62it/s]


--------------------------------------------------------------------------------
Parameter containing:
tensor([2.2578, 1.9814, 2.1816,  ..., 1.9541, 2.1445, 2.3750], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.2578, 1.9814, 2.1816,  ..., 1.9541, 2.1445, 2.3750],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.9414, 2.5898, 2.8301,  ..., 2.6680, 2.7930, 2.9043], device='cuda:0',
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([2.9414, 2.5898, 2.8301,  ..., 2.6

Quantizing qformer_layers: 100%|██████████| 12/12 [00:00<00:00, 269153.20it/s]
Quantizing llm_layers:   3%|▎         | 1/32 [00:01<00:41,  1.35s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-3.0308e-03,  2.3232e-03,  5.0240e-03,  ..., -6.9122e-03,
         -6.1321e-04, -7.5836e-03],
        [-2.5296e-04,  5.7411e-03,  5.2261e-03,  ..., -6.1836e-03,
         -8.6441e-03, -1.7517e-02],
        [ 8.7051e-03, -3.1109e-03, -4.5891e-03,  ...,  6.0997e-03,
          2.8801e-03, -8.6689e-04],
        ...,
        [-1.1269e-02, -1.0815e-03,  9.8109e-05,  ...,  7.0190e-03,
     

Quantizing llm_layers:   6%|▋         | 2/32 [00:02<00:39,  1.33s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.5531, 2.3587, 2.3023,  ..., 1.0395, 1.4589, 1.9070],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[ 0.0031,  0.0039,  0.0040,  ...,  0.0022, -0.0004, -0.0018],
        [-0.0015,  0.0006, -0.0007,  ...,  0.0084, -0.0024, -0.0009],
        [ 0.0024, -0.0026, -0.0025,  ..., -0.0020,  0.0046,  0.0023],
        ...,
        [-0.0033,  0.0005, -0.0013,  ...,  0.0002, -0.0036, -0.0062],
        [-0.0017, -0.0031, -0.0030,  ..., -0.0082,  0.0011,  0.0039],


Quantizing llm_layers:   9%|▉         | 3/32 [00:03<00:38,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.6651, 2.5628, 2.3958,  ..., 1.0957, 1.4827, 1.9393],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0030, -0.0072, -0.0025,  ..., -0.0106, -0.0175, -0.0120],
        [ 0.0074,  0.0003,  0.0004,  ..., -0.0037,  0.0135,  0.0008],
        [-0.0071,  0.0030, -0.0056,  ...,  0.0038, -0.0004,  0.0064],
        ...,
        [-0.0043, -0.0054, -0.0010,  ...,  0.0087, -0.0079, -0.0040],
        [ 0.0088,  0.0027, -0.0036,  ..., -0.0099,  0.0017,  0.0052],


Quantizing llm_layers:  12%|█▎        | 4/32 [00:05<00:36,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0096,  0.0021, -0.0016,  ...,  0.0232, -0.0084,  0.0242],
        [-0.0008,  0.0004,  0.0033,  ...,  0.0039, -0.0099, -0.0029],
        [-0.0155, -0.0035,  0.0152,  ..., -0.0014, -0.0223, -0.0371],
        ...,
        [ 0.0171,  0.0108,  0.0009,  ..., -0.0156, -0.0198,  0.0014],
        [ 0.0101, -0.0093,  0.0021,  ...,  0.0096,  0.0128, -0.0063],
        [ 0.0074,  0.0174,  0.0

Quantizing llm_layers:  16%|█▌        | 5/32 [00:06<00:35,  1.31s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0137,  0.0060, -0.0153,  ...,  0.0134,  0.0123, -0.0016],
        [ 0.0057,  0.0115,  0.0085,  ..., -0.0157, -0.0036, -0.0019],
        [-0.0052,  0.0098, -0.0029,  ...,  0.0135,  0.0013, -0.0163],
        ...,
        [-0.0169, -0.0063, -0.0109,  ..., -0.0050, -0.0020,  0.0017],
        [-0.0075,  0.0107,  0.0007,  ...,  0.0008,  0.0086, -0.0100],
        [-0.0068, -0.0078, -0.0

Quantizing llm_layers:  19%|█▉        | 6/32 [00:07<00:34,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0056, -0.0031,  0.0192,  ...,  0.0087, -0.0113,  0.0035],
        [ 0.0001,  0.0075,  0.0011,  ..., -0.0212,  0.0124, -0.0099],
        [-0.0023,  0.0012, -0.0030,  ...,  0.0103, -0.0054, -0.0036],
        ...,
        [ 0.0064,  0.0077,  0.0136,  ..., -0.0108, -0.0067, -0.0139],
        [ 0.0015, -0.0022,  0.0003,  ..., -0.0066,  0.0401, -0.0024],
        [-0.0072, -0.0097,  0.0

Quantizing llm_layers:  22%|██▏       | 7/32 [00:09<00:33,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0097, -0.0109, -0.0004,  ..., -0.0073, -0.0140, -0.0063],
        [-0.0045,  0.0181,  0.0045,  ...,  0.0066,  0.0133,  0.0036],
        [ 0.0002,  0.0138,  0.0088,  ...,  0.0120,  0.0082,  0.0012],
        ...,
        [ 0.0159, -0.0064, -0.0005,  ...,  0.0032,  0.0177, -0.0016],
        [-0.0185,  0.0029, -0.0110,  ..., -0.0180,  0.0211,  0.0188],
        [-0.0241,  0.0074, -0.0

Quantizing llm_layers:  25%|██▌       | 8/32 [00:10<00:31,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[ 0.0101, -0.0160,  0.0317,  ..., -0.0038,  0.0110, -0.0153],
        [-0.0211,  0.0198,  0.0244,  ..., -0.0038, -0.0146, -0.0125],
        [-0.0107, -0.0069,  0.0201,  ..., -0.0005, -0.0081,  0.0013],
        ...,
        [-0.0110,  0.0049,  0.0053,  ..., -0.0216,  0.0007, -0.0052],
        [-0.0141, -0.0104, -0.0074,  ...,  0.0104,  0.0080,  0.0023],
        [-0.0050, -0.0013,  0.0

Quantizing llm_layers:  28%|██▊       | 9/32 [00:11<00:30,  1.33s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0021, -0.0092, -0.0005,  ..., -0.0228, -0.0086,  0.0055],
        [-0.0037,  0.0133,  0.0265,  ..., -0.0073,  0.0202,  0.0108],
        [ 0.0080,  0.0056, -0.0140,  ...,  0.0074, -0.0015, -0.0226],
        ...,
        [-0.0025,  0.0115,  0.0006,  ...,  0.0011,  0.0078,  0.0152],
        [ 0.0040,  0.0129,  0.0332,  ..., -0.0059, -0.0058, -0.0030],
        [-0.0020, -0.0018, -0.0

Quantizing llm_layers:  31%|███▏      | 10/32 [00:13<00:29,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0020, -0.0632,  0.0022,  ...,  0.0038, -0.0165, -0.0104],
        [ 0.0117, -0.0156,  0.0172,  ...,  0.0083,  0.0159, -0.0033],
        [-0.0181, -0.0168, -0.0136,  ...,  0.0091, -0.0180, -0.0028],
        ...,
        [ 0.0110,  0.0070, -0.0123,  ...,  0.0050, -0.0330,  0.0214],
        [-0.0182,  0.0176, -0.0330,  ...,  0.0164, -0.0018, -0.0017],
        [-0.0092,  0.0150,  0.0

Quantizing llm_layers:  34%|███▍      | 11/32 [00:14<00:27,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-1.0506e-02,  1.2993e-02, -2.1152e-03,  ...,  1.6678e-02,
         -4.3716e-03, -8.6670e-03],
        [-9.2545e-03, -1.8616e-02, -6.5346e-03,  ...,  1.1620e-02,
          1.7654e-02, -4.1771e-03],
        [ 2.2352e-05, -1.5076e-02,  3.7169e-04,  ..., -1.8005e-03,
          1.5305e-02,  1.0941e-02],
        ...,
        [-2.5921e-03,  8.2703e-03, -5.4970e-03,  ..., -1.6891e-02,
     

Quantizing llm_layers:  38%|███▊      | 12/32 [00:15<00:26,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0013, -0.0055,  0.0098,  ..., -0.0101,  0.0418,  0.0193],
        [ 0.0062,  0.0107, -0.0045,  ...,  0.0059, -0.0020, -0.0004],
        [ 0.0168,  0.0276,  0.0138,  ...,  0.0141, -0.0338,  0.0081],
        ...,
        [ 0.0180, -0.0295, -0.0067,  ...,  0.0175, -0.0003, -0.0119],
        [-0.0028,  0.0159, -0.0017,  ..., -0.0259, -0.0024,  0.0238],
        [ 0.0010,  0.0241,  0.0

Quantizing llm_layers:  41%|████      | 13/32 [00:17<00:24,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0045,  0.0173, -0.0139,  ...,  0.0014,  0.0005,  0.0115],
        [-0.0056, -0.0126,  0.0057,  ..., -0.0135, -0.0031, -0.0007],
        [-0.0050,  0.0185,  0.0123,  ..., -0.0052, -0.0104, -0.0258],
        ...,
        [-0.0204,  0.0095, -0.0028,  ...,  0.0039, -0.0137, -0.0004],
        [-0.0189, -0.0063, -0.0056,  ..., -0.0356, -0.0076, -0.0237],
        [ 0.0048,  0.0287, -0.0

Quantizing llm_layers:  44%|████▍     | 14/32 [00:18<00:23,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.4963, 2.4322, 2.1624,  ..., 1.2719, 1.2193, 1.8299],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0065, -0.0106,  0.0033,  ..., -0.0070,  0.0007,  0.0042],
        [-0.0082,  0.0022, -0.0005,  ...,  0.0131,  0.0031,  0.0013],
        [-0.0088, -0.0023,  0.0047,  ...,  0.0016,  0.0055,  0.0140],
        ...,
        [ 0.0029, -0.0049,  0.0053,  ...,  0.0004, -0.0124,  0.0018],
        [-0.0074, -0.0003,  0.0067,  ..., -0.0024,  0.0134,  0.0126],


Quantizing llm_layers:  47%|████▋     | 15/32 [00:19<00:22,  1.31s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0045,  0.0202, -0.0043,  ...,  0.0024,  0.0111,  0.0023],
        [-0.0248, -0.0071,  0.0169,  ...,  0.0012, -0.0144,  0.0056],
        [-0.0076,  0.0386,  0.0235,  ...,  0.0021, -0.0119, -0.0234],
        ...,
        [-0.0225, -0.0240,  0.0161,  ...,  0.0014,  0.0225, -0.0217],
        [-0.0008,  0.0053, -0.0281,  ..., -0.0046, -0.0210,  0.0234],
        [ 0.0011, -0.0319,  0.0

Quantizing llm_layers:  50%|█████     | 16/32 [00:21<00:21,  1.31s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.4067, 2.5606, 1.9257,  ..., 1.2771, 1.2110, 1.7681],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0024, -0.0015,  0.0034,  ...,  0.0060,  0.0050, -0.0035],
        [-0.0023, -0.0105,  0.0049,  ...,  0.0070,  0.0147, -0.0075],
        [ 0.0125, -0.0020, -0.0073,  ...,  0.0049, -0.0045, -0.0171],
        ...,
        [ 0.0093, -0.0073,  0.0016,  ..., -0.0150,  0.0081, -0.0045],
        [-0.0135,  0.0028, -0.0066,  ..., -0.0076,  0.0016,  0.0075],


Quantizing llm_layers:  53%|█████▎    | 17/32 [00:22<00:19,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0102, -0.0069, -0.0158,  ...,  0.0275,  0.0102,  0.0024],
        [-0.0024, -0.0046, -0.0216,  ...,  0.0126,  0.0143, -0.0089],
        [ 0.0160,  0.0088, -0.0036,  ...,  0.0159,  0.0122,  0.0029],
        ...,
        [ 0.0001, -0.0068,  0.0047,  ...,  0.0182, -0.0074,  0.0053],
        [ 0.0050,  0.0109,  0.0073,  ...,  0.0152, -0.0128, -0.0117],
        [ 0.0203, -0.0095, -0.0

Quantizing llm_layers:  56%|█████▋    | 18/32 [00:23<00:18,  1.33s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[ 0.0094,  0.0041, -0.0059,  ...,  0.0080, -0.0037, -0.0205],
        [ 0.0116,  0.0253, -0.0293,  ..., -0.0107,  0.0094, -0.0089],
        [-0.0077, -0.0011, -0.0163,  ...,  0.0071, -0.0195,  0.0230],
        ...,
        [-0.0098,  0.0114, -0.0143,  ...,  0.0159, -0.0007,  0.0102],
        [-0.0054, -0.0063, -0.0281,  ...,  0.0031, -0.0136,  0.0090],
        [ 0.0148,  0.0217, -0.0

Quantizing llm_layers:  59%|█████▉    | 19/32 [00:25<00:17,  1.33s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0218, -0.0053,  0.0063,  ...,  0.0081, -0.0105, -0.0061],
        [-0.0157, -0.0084,  0.0170,  ..., -0.0041, -0.0197,  0.0048],
        [ 0.0070, -0.0193, -0.0005,  ..., -0.0055,  0.0065, -0.0066],
        ...,
        [-0.0053,  0.0012,  0.0129,  ..., -0.0052,  0.0117, -0.0348],
        [ 0.0283,  0.0213,  0.0325,  ..., -0.0099,  0.0107,  0.0420],
        [-0.0125, -0.0202, -0.0

Quantizing llm_layers:  62%|██████▎   | 20/32 [00:26<00:15,  1.33s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0220,  0.0114,  0.0025,  ...,  0.0238,  0.0361, -0.0075],
        [-0.0210,  0.0444, -0.0073,  ..., -0.0111, -0.0233, -0.0295],
        [-0.0444, -0.0071,  0.0216,  ..., -0.0013,  0.0060, -0.0064],
        ...,
        [-0.0065, -0.0038,  0.0049,  ..., -0.0213, -0.0233,  0.0417],
        [ 0.0070,  0.0289,  0.0159,  ...,  0.0229, -0.0245,  0.0176],
        [ 0.0231,  0.0201, -0.0

Quantizing llm_layers:  66%|██████▌   | 21/32 [00:27<00:14,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0263,  0.0399,  0.0331,  ...,  0.0036,  0.0171,  0.0132],
        [-0.0254, -0.0018, -0.0209,  ...,  0.0050,  0.0324, -0.0037],
        [-0.0004, -0.0210, -0.0122,  ...,  0.0214, -0.0281, -0.0093],
        ...,
        [-0.0107, -0.0049,  0.0084,  ..., -0.0019,  0.0070, -0.0107],
        [ 0.0103, -0.0115,  0.0141,  ...,  0.0198, -0.0007,  0.0245],
        [ 0.0231,  0.0118, -0.0

Quantizing llm_layers:  69%|██████▉   | 22/32 [00:29<00:13,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[ 0.0065,  0.0090, -0.0048,  ..., -0.0086, -0.0038, -0.0112],
        [-0.0081,  0.0287, -0.0223,  ...,  0.0111,  0.0149, -0.0078],
        [ 0.0298, -0.0022, -0.0086,  ...,  0.0232, -0.0031, -0.0033],
        ...,
        [-0.0104,  0.0109,  0.0367,  ..., -0.0290,  0.0016, -0.0051],
        [-0.0128,  0.0075,  0.0096,  ..., -0.0088, -0.0126, -0.0140],
        [-0.0048, -0.0006, -0.0

Quantizing llm_layers:  72%|███████▏  | 23/32 [00:30<00:11,  1.33s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2229, 2.4235, 1.5066,  ..., 1.2677, 1.2931, 1.6046],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[ 2.6503e-02,  1.5489e-02, -2.4672e-02,  ...,  9.7856e-03,
         -5.4650e-04, -2.4890e-03],
        [ 9.7828e-03, -1.6087e-03, -1.1718e-02,  ..., -3.1652e-04,
         -1.8255e-02,  1.0774e-02],
        [-7.2185e-03,  6.9573e-03, -1.5942e-02,  ..., -2.2418e-03,
          3.5962e-03,  5.8434e-03],
        ...,
        [ 3.1413e-03, -2.6255e-03,  7.783

Quantizing llm_layers:  75%|███████▌  | 24/32 [00:31<00:10,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1.2216, 2.3700, 1.4751,  ..., 1.2807, 1.3036, 1.5925],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[ 0.0036, -0.0050, -0.0065,  ...,  0.0017, -0.0076,  0.0054],
        [ 0.0133,  0.0094,  0.0264,  ...,  0.0246, -0.0057,  0.0027],
        [ 0.0231,  0.0084, -0.0274,  ..., -0.0150,  0.0173,  0.0054],
        ...,
        [ 0.0063,  0.0046,  0.0242,  ...,  0.0082, -0.0048, -0.0141],
        [-0.0336, -0.0074, -0.0287,  ...,  0.0014,  0.0301,  0.0161],


Quantizing llm_layers:  78%|███████▊  | 25/32 [00:33<00:09,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0062, -0.0032, -0.0245,  ..., -0.0082, -0.0113, -0.0137],
        [-0.0193, -0.0138, -0.0299,  ...,  0.0008, -0.0012, -0.0134],
        [ 0.0083, -0.0016,  0.0060,  ...,  0.0271, -0.0001,  0.0186],
        ...,
        [-0.0143,  0.0045,  0.0093,  ..., -0.0024,  0.0251,  0.0066],
        [ 0.0262,  0.0062, -0.0064,  ...,  0.0265,  0.0384,  0.0072],
        [-0.0051,  0.0276,  0.0

Quantizing llm_layers:  81%|████████▏ | 26/32 [00:34<00:07,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-5.5771e-03,  2.7893e-02,  2.6123e-02,  ..., -6.1760e-03,
         -1.3618e-02,  2.2449e-03],
        [ 1.8753e-02,  1.1391e-02,  2.0309e-02,  ...,  1.2978e-02,
         -6.7472e-04, -5.1956e-03],
        [ 1.6190e-02, -3.4821e-02,  5.0659e-03,  ...,  3.2597e-03,
          1.9665e-03,  4.0802e-02],
        ...,
        [ 1.4896e-03,  8.1482e-03, -2.1973e-03,  ...,  5.4893e-03,
     

Quantizing llm_layers:  84%|████████▍ | 27/32 [00:35<00:06,  1.31s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[ 0.0114,  0.0801, -0.0231,  ...,  0.0347, -0.0163,  0.0451],
        [ 0.0302,  0.0408, -0.0253,  ...,  0.0234,  0.0280,  0.0329],
        [ 0.0010, -0.0282, -0.0411,  ...,  0.0051,  0.0058,  0.0052],
        ...,
        [-0.0257,  0.0027,  0.0139,  ..., -0.0055,  0.0070,  0.0395],
        [ 0.0179, -0.0128,  0.0786,  ..., -0.0050,  0.0548,  0.0004],
        [-0.0119,  0.0002, -0.0

Quantizing llm_layers:  88%|████████▊ | 28/32 [00:36<00:05,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[ 0.0443,  0.0137,  0.0084,  ...,  0.0504, -0.0236,  0.0277],
        [-0.0019, -0.0175, -0.0388,  ...,  0.0180,  0.0457,  0.0294],
        [ 0.0074,  0.0154,  0.0018,  ..., -0.0132, -0.0334,  0.0115],
        ...,
        [-0.0076, -0.0065,  0.0263,  ..., -0.0151,  0.0310, -0.0019],
        [ 0.0110, -0.0172, -0.0009,  ..., -0.0027, -0.0115,  0.0201],
        [ 0.0117, -0.0001,  0.0

Quantizing llm_layers:  91%|█████████ | 29/32 [00:38<00:03,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0112, -0.0630, -0.0029,  ..., -0.0075, -0.0380,  0.0103],
        [-0.0163, -0.0092, -0.0046,  ..., -0.0201,  0.0094, -0.0381],
        [ 0.0037,  0.0174, -0.0392,  ..., -0.0228, -0.0547,  0.0195],
        ...,
        [-0.0166, -0.0205,  0.0046,  ...,  0.0110, -0.0224, -0.0069],
        [ 0.0069,  0.0178, -0.0355,  ..., -0.0073,  0.0269, -0.0113],
        [-0.0681,  0.0122, -0.0

Quantizing llm_layers:  94%|█████████▍| 30/32 [00:39<00:02,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-1.7029e-02,  3.5431e-02, -1.2955e-02,  ...,  3.2745e-02,
         -1.2711e-02,  3.5095e-02],
        [ 6.2744e-02,  2.1088e-02, -4.0009e-02,  ...,  6.5613e-02,
         -2.0615e-02,  8.0032e-03],
        [ 8.1062e-05, -4.0054e-03,  9.3079e-03,  ...,  6.7993e-02,
          2.8275e-02,  6.6605e-03],
        ...,
        [ 2.8595e-02,  2.3804e-02,  7.4997e-03,  ...,  1.6556e-02,
     

Quantizing llm_layers:  97%|█████████▋| 31/32 [00:40<00:01,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.8456, 1.7902, 1.4572,  ..., 1.4527, 1.2658, 1.2724],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[ 0.0218, -0.0257, -0.0276,  ..., -0.0017,  0.0001, -0.0077],
        [ 0.0186, -0.0139, -0.0032,  ...,  0.0062, -0.0143,  0.0398],
        [ 0.0229,  0.0123, -0.0166,  ..., -0.0050, -0.0067, -0.0262],
        ...,
        [ 0.0118, -0.0141, -0.0230,  ...,  0.0066,  0.0257,  0.0038],
        [ 0.0073, -0.0045, -0.0230,  ...,  0.0220,  0.0319,  0.0137],


Quantizing llm_layers: 100%|██████████| 32/32 [00:42<00:00,  1.32s/it]

--------------------------------------------------------------------------------
Parameter containing:
tensor([1., 1., 1.,  ..., 1., 1., 1.], device='cuda:0', requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([0.8034, 1.5570, 1.4686,  ..., 1.1771, 1.2233, 1.1835],
       requires_grad=True)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Parameter containing:
tensor([[-0.0473,  0.0355,  0.0174,  ..., -0.0111,  0.0204, -0.0035],
        [ 0.0108,  0.0119, -0.0045,  ...,  0.0150,  0.0060,  0.0087],
        [ 0.0720,  0.0006,  0.0039,  ...,  0.0004, -0.0162,  0.0208],
        ...,
        [ 0.0104, -0.0037,  0.0020,  ...,  0.0160, -0.0180,  0.0087],
        [ 0.0155, -0.0081,  0.0165,  ...,  0.0440, -0.0014,  0.0121],





In [15]:
scales

[]

In [30]:
layer_args['qformer_layers']

(tensor([[[[-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
            -0., -0., -0., -0., -0., -0., -0., -0., -0.]]],
 
 
         [[[-0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0., -0.,
            -0., -0., -0., -0., -0., -0., -0., -0., -0.]]]], device='cuda:0'),
 None,
 tensor([[[-0.7125,  0.3542,  0.7332,  ..., -0.5284, -0.0729,  0.0879],
          [-0.9326, -0.9130, -0.3849,  ..., -0.3952, -0.8801,  0.9865],
          [-0.2113,  0.1189,  1.1975,  ...,  0.2811, -0.8168,  0.2782],
          ...,
          [ 0.2122, -0.7599, -0.0777,  ...,  0.5073,  0.3702, -0.0031],
          [ 0.6824, -0.3208,  0.6164,  ..., -0.0497,  1.5667,  0.4806],
          [ 0.7810, -0.7398,  1.0975,  ...,  0.1164, -1.0482, -0.7280]],
 
         [[ 0.0457,  0.5419,  0.7550,  ..., -0.0269, -0.6244,  0.9082],
          [-0.8845, -0.2183,  0.2585,  ...,  0.9610, -0.3391,  1.6280],
  

In [31]:
layer_kwargs['qformer_layers']

{}

In [24]:
layer_kwargs['vit_layers']

{'output_attentions': False}

In [15]:
print(linear_inputs.keys())

dict_keys(['self_attn.qkv', 'self_attn.projection', 'mlp.fc1', 'mlp.fc2'])


In [42]:
# model = Blip2ForImageTextRetrieval.from_pretrained("Salesforce/blip2-itm-vit-g", torch_dtype=torch.float16)
# processor = AutoProcessor.from_pretrained("Salesforce/blip2-itm-vit-g")
# model.to(device)

# device

In [13]:
layers['vit_layers'][0]

Blip2EncoderLayer(
  (self_attn): Blip2Attention(
    (dropout): Dropout(p=0.0, inplace=False)
    (qkv): Linear(in_features=1408, out_features=4224, bias=True)
    (projection): Linear(in_features=1408, out_features=1408, bias=True)
  )
  (layer_norm1): LayerNorm((1408,), eps=1e-06, elementwise_affine=True)
  (mlp): Blip2MLP(
    (activation_fn): GELUActivation()
    (fc1): Linear(in_features=1408, out_features=6144, bias=True)
    (fc2): Linear(in_features=6144, out_features=1408, bias=True)
  )
  (layer_norm2): LayerNorm((1408,), eps=1e-06, elementwise_affine=True)
)

In [37]:
linear_inputs['self_attn.qkv'].shape

torch.Size([2, 257, 1408])

In [20]:
first_inputs['vit_layers']

tensor([[[ 0.7319,  0.2039, -0.1177,  ...,  0.3483, -0.1315, -0.3481],
         [-0.2441,  0.8111, -0.0983,  ...,  0.0234, -0.1451, -0.7400],
         [-0.0418,  1.7882, -0.3203,  ..., -0.0219, -0.0488, -0.1417],
         ...,
         [-0.9605, -0.2865,  0.5448,  ..., -0.0762,  0.4271,  1.2226],
         [-0.6000, -0.1437,  0.1221,  ..., -0.1582,  0.1567,  1.4119],
         [-0.2725, -0.3828,  0.3872,  ...,  0.2089,  0.1482,  0.7765]],

        [[ 0.7319,  0.2039, -0.1177,  ...,  0.3483, -0.1315, -0.3481],
         [ 0.4066,  0.8089, -0.1578,  ...,  0.0691,  0.0124, -0.3917],
         [ 0.2490,  1.6557, -0.3386,  ..., -0.1113,  0.0690, -0.0610],
         ...,
         [-1.0813, -0.3526,  0.3304,  ..., -0.0324,  0.3346,  0.8685],
         [-0.5170, -0.1903, -0.0778,  ..., -0.1721,  0.1628,  1.2161],
         [-0.0057, -0.5496,  0.3215,  ...,  0.1134, -0.0510,  0.9223]]],
       device='cuda:0')

In [25]:
layer_args['vit_layers']

(None,)

In [23]:
layer_kwargs['vit_layers']

{'output_attentions': False}

In [28]:
out = layers['vit_layers'][0](first_inputs['vit_layers'], *layer_args['vit_layers'], **layer_kwargs['vit_layers'])

In [30]:
out[0]

tensor([[[ 0.3378, -0.1946, -0.0252,  ...,  0.6891, -0.1665, -0.2123],
         [-0.5796, -0.0850,  0.8516,  ...,  0.2505,  0.2570, -2.0609],
         [-0.1985,  1.0866,  0.2261,  ..., -0.4743, -0.8173, -0.4517],
         ...,
         [-1.9534, -0.5337,  0.5927,  ...,  0.0530,  0.0967,  1.0787],
         [-1.4451, -0.4682,  0.1352,  ...,  0.0619,  0.0195,  1.4128],
         [-0.4682, -1.1753,  0.9844,  ...,  0.6326,  1.3233,  0.8188]],

        [[ 0.0635, -0.3202,  0.1072,  ...,  0.6998, -0.0949, -0.1115],
         [-0.6654,  0.2025,  0.1473,  ...,  0.6025,  0.0049,  0.0392],
         [-0.9092,  1.0444, -0.1438,  ...,  0.4590,  0.1668,  0.3372],
         ...,
         [-1.1537, -1.1512,  1.0716,  ...,  0.6038, -0.2326,  0.4638],
         [-0.8844, -0.3487,  0.2901,  ...,  0.2318, -0.4852,  1.4794],
         [-0.9237, -0.8065,  0.7050,  ...,  0.6329,  0.0026,  1.1944]]],
       device='cuda:0', grad_fn=<AddBackward0>)

In [31]:
layers['vit_layers'][1](out[0], *layer_args['vit_layers'],  **layer_kwargs['vit_layers'])

(tensor([[[ 0.0026, -0.0644, -0.1888,  ...,  1.0750,  0.1186,  0.5769],
          [-1.0819,  0.1315,  0.9809,  ...,  0.7254,  0.2461, -1.5226],
          [-0.2447,  0.9629,  0.4360,  ..., -0.0313, -1.1122, -0.2950],
          ...,
          [-2.0357,  0.1081,  0.3105,  ...,  0.5053,  0.0971,  1.0850],
          [-1.2883,  0.0584, -0.2668,  ...,  0.5149,  0.1054,  1.6861],
          [-0.4440, -1.2503,  0.4363,  ...,  1.1043,  0.8403,  0.6618]],
 
         [[-0.1361,  0.0898, -0.0243,  ...,  0.9252,  0.0271,  0.6004],
          [-0.6104,  0.3142, -0.1762,  ...,  0.7918, -0.2915,  0.5835],
          [-0.9518,  0.8436, -0.6670,  ...,  0.6580,  0.1580,  0.7574],
          ...,
          [-0.8174, -0.8920,  1.0149,  ...,  0.7384,  0.2734,  0.8379],
          [-1.1361, -0.0739, -0.0643,  ...,  0.7002,  0.1879,  2.2353],
          [-0.8765, -0.5891,  0.4028,  ...,  1.1088,  0.3058,  1.6114]]],
        device='cuda:0', grad_fn=<AddBackward0>),)

In [21]:
first_inputs['qformer_layers']

tensor([[[-0.7876, -0.3205, -0.0842,  ..., -0.6614, -0.0151, -0.5240],
         [-0.0952, -0.0247,  0.3759,  ..., -0.2078,  0.4916, -0.4537],
         [-0.5563,  0.5105, -0.6659,  ..., -0.2041,  0.5277,  0.8380],
         ...,
         [-0.1493,  1.2919,  1.5551,  ...,  0.2978, -1.4789,  0.2294],
         [ 0.2532,  0.0649, -0.7901,  ..., -0.4740, -1.6942, -0.6370],
         [ 0.3278, -0.4323,  0.2681,  ..., -0.4160,  0.3958, -0.1349]],

        [[-0.7876, -0.3205, -0.0842,  ..., -0.6614, -0.0151, -0.5240],
         [-0.0952, -0.0247,  0.3759,  ..., -0.2078,  0.4916, -0.4537],
         [-0.5563,  0.5105, -0.6659,  ..., -0.2041,  0.5277,  0.8380],
         ...,
         [-0.1493,  1.2919,  1.5551,  ...,  0.2978, -1.4789,  0.2294],
         [ 0.2532,  0.0649, -0.7901,  ..., -0.4740, -1.6942, -0.6370],
         [ 0.3278, -0.4323,  0.2681,  ..., -0.4160,  0.3958, -0.1349]]],
       device='cuda:0')

In [44]:
layers['qformer_layers'][0](*first_inputs['qformer_layers'], **layer_kwargs['qformer_layers'])

(tensor([[[-0.5611, -0.4313,  0.7348,  ..., -0.5339,  0.2271, -0.6419],
          [-0.0081,  2.1908,  0.5378,  ...,  0.0602,  0.0924, -0.4073],
          [-0.3419,  0.2889, -0.3378,  ..., -0.3128,  0.8518, -0.0093],
          ...,
          [-0.4123,  1.7034, -0.6390,  ...,  0.3584, -0.3071, -0.3509],
          [-0.3916,  0.2184,  0.2309,  ..., -0.4097,  0.2242, -0.1063],
          [ 0.1907,  1.5663,  0.5063,  ..., -0.2080,  0.2138, -0.3192]],
 
         [[-0.6695, -0.3799,  0.7295,  ..., -0.4814,  0.0236, -0.6724],
          [-0.1965, -0.3873, -0.5339,  ..., -0.6227,  0.0911,  0.0212],
          [-0.8092,  0.3860, -0.2228,  ..., -0.5321, -0.2148, -0.6384],
          ...,
          [-0.1741,  0.9254, -0.0700,  ...,  0.0874, -1.2012, -0.1439],
          [-0.3797,  0.2424, -0.4833,  ..., -0.7007, -0.8832, -0.7238],
          [ 0.4638, -0.7695, -0.3554,  ..., -0.5841, -0.0452, -0.0482]]],
        device='cuda:0', grad_fn=<NativeLayerNormBackward0>),
 (tensor([[[[-1.7426,  0.2527,  1.7284,

In [18]:
layer_kwargs['qformer_layers']

{}

In [22]:
first_inputs['llm_layers']

tensor([[[-1.3969e+00,  2.4953e-01, -5.2337e-01,  ..., -9.5912e-01,
           1.3994e+00, -1.5692e-02],
         [ 1.9842e+00, -1.8033e+00, -5.4722e-01,  ..., -4.5155e-01,
          -1.1304e+00, -2.2611e-01],
         [ 8.4684e-01, -1.9760e-01, -5.8955e-01,  ...,  7.3538e-01,
           2.6387e-02, -5.0607e-01],
         ...,
         [ 7.1303e-01,  1.2223e+00,  8.5825e-01,  ...,  4.4241e+00,
          -2.8862e-01,  1.0177e+00],
         [ 2.8392e+00, -1.2753e+00, -2.6297e-01,  ..., -6.6006e-02,
          -1.6678e+00, -8.7073e-01],
         [-3.2982e-02,  3.1891e-03, -3.8714e-03,  ..., -2.6846e-02,
           1.0907e-02,  8.5526e-03]],

        [[-6.8959e-01, -4.7141e-01, -3.1821e-01,  ...,  5.0050e-01,
           1.0790e+00, -1.5016e-01],
         [ 9.6880e-01, -1.3011e+00,  1.2982e-01,  ...,  2.1108e-01,
          -1.5340e+00, -1.8530e-01],
         [ 2.3157e+00,  1.2002e+00,  8.9333e-01,  ...,  8.5588e-01,
          -9.5109e-01,  1.1636e+00],
         ...,
         [ 1.1332e+00, -5

In [19]:
layer_kwargs['llm_layers']

{'attention_mask': tensor([[[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]],
 
 
         [[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
            0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]],
        device='cuda:0'),
 'layer_head_mask': None,
 'past_key_value': (tensor([[[[ 3.4119e+00, -1.4275e+00,  1.6319e+00,  ...,  5.2653e-01,
              2.4766e+00, -1.3366e+00],
            [ 1.7859e+00, -2.7154e-01,  8.6161e-01,  ..., -8.4367e-02,
              1.5484e+00,  4.6518e-01],
            [ 1.8167e+00, -5.6734e-01,  1.4872e+00,  ..., -7.3273e-01,
              6.8322e-01, -1.6523e-01],
            ...,
            [ 2.3114e+00, -3.1942e-01, -6.0153e-01,  ..., -1.4472e+00,
              1.0140e+00, -6.6369e-01],
            [ 6.2211e-01,  4.0976e-01, -

In [161]:
calib_set = b._get_calibration_set()
calib_set.shape

torch.Size([2, 3, 224, 224])

In [164]:

inps = []
layer_kwargs = {}
# get input and kwargs to layer 0
# with_kwargs is only supported in PyTorch 2.0
# use this Catcher hack for now
class Catcher(nn.Module):
    def __init__(self, module):
        super().__init__()
        self.module = module

    def forward(self, *args, **kwargs):
        # assume first input to forward is hidden states
        if len(args) > 0:
            hidden_states = args[0]
            del args
        else:
            first_key = list(kwargs.keys())[0]
            hidden_states = kwargs.pop(first_key)

        inps.append(hidden_states)
        layer_kwargs.update(kwargs)
        raise ValueError  # early exit to break later inference


modules[0] = Catcher(modules[0])

try:
    model.generate(calib_set.to(next(model.parameters()).device))
except ValueError:
    pass


In [165]:
modules

ModuleList(
  (0): Catcher(
    (module): Catcher(
      (module): Catcher(
        (module): Blip2EncoderLayer(
          (self_attn): Blip2Attention(
            (dropout): Dropout(p=0.0, inplace=False)
            (qkv): Linear(in_features=1408, out_features=4224, bias=True)
            (projection): Linear(in_features=1408, out_features=1408, bias=True)
          )
          (layer_norm1): LayerNorm((1408,), eps=1e-06, elementwise_affine=True)
          (mlp): Blip2MLP(
            (activation_fn): GELUActivation()
            (fc1): Linear(in_features=1408, out_features=6144, bias=True)
            (fc2): Linear(in_features=6144, out_features=1408, bias=True)
          )
          (layer_norm2): LayerNorm((1408,), eps=1e-06, elementwise_affine=True)
        )
      )
    )
  )
  (1-38): 38 x Blip2EncoderLayer(
    (self_attn): Blip2Attention(
      (dropout): Dropout(p=0.0, inplace=False)
      (qkv): Linear(in_features=1408, out_features=4224, bias=True)
      (projection): Linea

In [166]:
inps

[tensor([[[ 0.7319,  0.2039, -0.1177,  ...,  0.3483, -0.1315, -0.3481],
          [-0.2441,  0.8111, -0.0983,  ...,  0.0234, -0.1451, -0.7400],
          [-0.0418,  1.7882, -0.3203,  ..., -0.0219, -0.0488, -0.1417],
          ...,
          [-0.9605, -0.2865,  0.5448,  ..., -0.0762,  0.4271,  1.2226],
          [-0.6000, -0.1437,  0.1221,  ..., -0.1582,  0.1567,  1.4119],
          [-0.2725, -0.3828,  0.3872,  ...,  0.2089,  0.1482,  0.7765]],
 
         [[ 0.7319,  0.2039, -0.1177,  ...,  0.3483, -0.1315, -0.3481],
          [ 0.4066,  0.8089, -0.1578,  ...,  0.0691,  0.0124, -0.3917],
          [ 0.2490,  1.6557, -0.3386,  ..., -0.1113,  0.0690, -0.0610],
          ...,
          [-1.0813, -0.3526,  0.3304,  ..., -0.0324,  0.3346,  0.8685],
          [-0.5170, -0.1903, -0.0778,  ..., -0.1721,  0.1628,  1.2161],
          [-0.0057, -0.5496,  0.3215,  ...,  0.1134, -0.0510,  0.9223]]],
        device='cuda:0')]

In [None]:
# b = Blip2ForImageTextRetrievalAWQQuantizer(model, processor, coco_dataset)
# inputs = b.quantize()


In [None]:
# TODO: exlude certain linear layers, reading from quant config

In [None]:
# TODO: solve for optimal (per input channel) scaling factor
# TODO: grid search for \alpha which balances protection of salient / non-salient weights