# SC_ASS1: FP16 vs INT8 vs INT8_VIT Benchmark

In [None]:
!pip install -q transformers accelerate bitsandbytes datasets pyarrow pillow seaborn matplotlib tqdm codecarbon
!pip install -q git+https://github.com/salaniz/pycocoevalcap.git@master

## 1 — Configuration

In [None]:
import yaml, json, os
from datetime import datetime

EXP = {
    "experiment_name": "VLM_Quantization",
    "model_id": "Qwen/Qwen2.5-VL-3B-Instruct",
    "quantizer": "bitsandbytes_linear8bitlt",
    "precision": {
        "language_model": "fp16"
    },
    "dataset": {
        "hf_parquet_prefix": "hf://datasets/phiyodr/coco2017/",
        "split_file": "data/validation-00000-of-00001-e3c37e369512a3aa.parquet",
        "limit": 5000
    },
    "seed": 42,
    "inference": {
        "max_new_tokens": 40,
        "img_size": 1024,
        "batch_size": 1000,
    }
}

os.makedirs('results', exist_ok=True)
config_path = f'results/experiment_config_{datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")}.yaml'
with open(config_path, 'w') as f:
    yaml.safe_dump(EXP, f)
print('Saved experiment config to', config_path)


## 2 — Utilities (image loader, timing, VRAM helpers)

In [None]:
import time
import torch
import random
import requests
from PIL import Image
from tqdm import tqdm
from io import BytesIO
import os

random.seed(EXP['seed'])
torch.manual_seed(EXP['seed'])

def load_image(path, max_size=1024):
    response = requests.get(path)
    img = Image.open(BytesIO(response.content)).convert("RGB")
    w, h = img.size
    scale = min(max_size / w, max_size / h, 1.0)  # don't upsample
    new_w, new_h = int(w*scale), int(h*scale)
    if scale < 1.0:
        img = img.resize((new_w, new_h))
    return img

def reset_cuda_stats():
    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()

def get_peak_vram_mib():
    if torch.cuda.is_available():
        return torch.cuda.max_memory_allocated() / 1024**2
    return 0.0

def get_caption(result):
    if not result:
        return None

    generated = result[0].get("generated_text", [])
    if not generated:
        return None

    # look for the first assistant entry
    for entry in generated:
        if entry.get("role") == "assistant":
            content = entry.get("content")
            if isinstance(content, str):
                # direct string
                return content.strip()
            elif isinstance(content, list):
                # list of dicts
                for c in content:
                    if isinstance(c, dict) and c.get("type") == "text" and "text" in c:
                        return c["text"].strip()
    return None


def get_model_size(model):
    total_bytes = 0
    for param in model.parameters():
        total_bytes += param.numel() * param.element_size()  # num elements × bytes per element
    return total_bytes / (1024**2)  # convert to MB


## 3 — Dataset loader
This cell shows how to load your parquet split from HF or local path.

In [None]:
from datasets import load_dataset

# Load parquet (hf:// or local)
parquet_path = EXP['dataset']['hf_parquet_prefix'] + EXP['dataset']['split_file']
print('Loading dataset from', parquet_path)
try:
    ds = load_dataset('parquet', data_files=parquet_path, split='train')
except Exception as e:
    print('Failed to load via datasets.load_dataset:', e)
    print('Try providing a local path to the parquet file instead.')
    raise

print('Dataset length:', len(ds))
print('Columns:', ds.column_names)

print('Example row:', ds[0])


## 4 — Model builders and quantization (bitsandbytes vision-only)
We load the full model in FP16 and then replace linear layers in the vision encoder with `bnb.nn.Linear8bitLt`.

In [None]:
from transformers import pipeline, BitsAndBytesConfig, AutoModelForImageTextToText, AutoProcessor
import bitsandbytes as bnb
import torch.nn as nn

MODEL_ID = EXP['model_id']
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Loading models on', device)

def build_fp16_model(model_id=MODEL_ID):
    processor = AutoProcessor.from_pretrained(model_id)
    model = AutoModelForImageTextToText.from_pretrained(
        model_id,
        device_map="auto",
        dtype=torch.bfloat16,
    )

    pipe = pipeline(
        "image-text-to-text",
        model=model,
        processor=processor,
        device_map="auto",
    )

    return pipe

def build_int8_model(model_id=MODEL_ID):
    bnb_config = BitsAndBytesConfig(
        load_in_8bit=True,
    )

    processor = AutoProcessor.from_pretrained(model_id)
    model = AutoModelForImageTextToText.from_pretrained(
        model_id,
        quantization_config=bnb_config,
        device_map="auto",
        dtype=torch.float16,
    )

    pipe = pipeline(
        "image-text-to-text",
        model=model,
        processor=processor,
        device_map="auto",
    )

    return pipe

def quantize_visual_mlp_to_8bit(model):
    quantized_count = 0
    model_dtype = next(model.parameters()).dtype

    for name, module in list(model.named_modules()):
        # Match layers inside the vision tower MLPs
        if (
            "vision_tower" in name or "visual" in name
        ) and isinstance(module, nn.Linear):

            parent_name = name.split(".")[:-1]
            attr_name = name.split(".")[-1]

            # Get parent module
            parent = model
            for p in parent_name:
                parent = getattr(parent, p)

            # Create new 8-bit layer
            new_linear = bnb.nn.Linear8bitLt(
                module.in_features,
                module.out_features,
                bias=module.bias is not None,
                has_fp16_weights=True,        # keep weights compressed but activations in fp16/bf16
            ).to(module.weight.device)

            # Copy weights and bias
            new_linear.weight.data.copy_(module.weight.data.to(torch.float16))
            if module.bias is not None:
                new_linear.bias.data.copy_(module.bias.data.to(torch.float16))

            # Replace layer
            setattr(parent, attr_name, new_linear)
            quantized_count += 1

    print(f"Quantized {quantized_count} Linear layers in visual tower to 8-bit using bitsandbytes.")
    return model

def build_vit_int8_model(model_id):
    processor = AutoProcessor.from_pretrained(model_id)
    model = AutoModelForImageTextToText.from_pretrained(
        model_id,
        dtype=torch.float16,
        device_map="auto"
    )

    model = quantize_visual_mlp_to_8bit(model)

    pipe = pipeline(
        "image-text-to-text",
        model=model,
        processor=processor,
    )

    return pipe

print('Model builder ready')


## 5 — Inference helper (processor + model.generate)

In [None]:
def generate_caption_with_model(pipe, image, prompt_text="Give a short caption.", max_new_tokens=24):
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": prompt_text},
            ],
        }
    ]
    with torch.inference_mode():
        output = pipe(messages, max_new_tokens=max_new_tokens)

    caption = get_caption(output)
    return caption

print('Inference helper ready.')

## 6 — Evaluation loop (runs over dataset, records latency, peak VRAM)

In [None]:
def evaluate_full(dataset, pipe, img_field='coco_url', refs_field='captions',
                  max_items=None, img_size=1024, max_new_tokens=24):
    results = []
    length = len(dataset)
    n = length if max_items is None else min(length, max_items)
    print(f"Evaluating {n} images...")

    for i in tqdm(range(n), desc='Eval'):
        row = dataset[i]

        # Load image
        image_input = row.get(img_field) if isinstance(row, dict) else row[img_field]
        img = load_image(image_input, img_size)

        # Load references
        refs = row.get(refs_field) if isinstance(row, dict) else row[refs_field]

        # Reset GPU stats
        reset_cuda_stats()
        t0 = time.time()

        # Generate caption safely
        try:
            pred = generate_caption_with_model(pipe, img, max_new_tokens=max_new_tokens)
        except Exception as e:
            print(f"Skipping image {i} due to error: {e}")
            continue

        t1 = time.time()
        latency = t1 - t0
        peak = get_peak_vram_mib()

        # Store results
        results.append({
            'image_id': i,
            'references': [str(r) for r in refs] if refs is not None else [],
            'pred': str(pred),
            'latency_s': latency,
            'peak_vram_mib': peak,
        })

    print(f"Completed {len(results)} image evaluations.")
    return results

print('Evaluation loop ready.')

## 7 — CIDEr metric computation

In [None]:
from pycocoevalcap.cider.cider import Cider

def compute_cider(results):
    gts = {i: [str(r) for r in res["references"]] for i, res in enumerate(results)}
    res = {i: [str(res["pred"])] for i, res in enumerate(results)}

    cider = Cider()
    score, individual_scores = cider.compute_score(gts, res)

    print(f"CIDEr: {score:.4f}")
    return score

## 8 — Plotting (seaborn)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import re

sns.set(style='whitegrid')

def plot_comparison(fp16_metrics, int8_metrics, int8_vit_metrics, out_prefix='results/plot'):
    # Convert to DataFrames and tag mode
    df_fp = pd.DataFrame(fp16_metrics)
    df_8 = pd.DataFrame(int8_metrics)
    df_8_vit = pd.DataFrame(int8_vit_metrics)
    df_fp['mode'] = 'fp16'
    df_8['mode'] = 'int8'
    df_8_vit['mode'] = 'vit_int8'
    df = pd.concat([df_fp, df_8, df_8_vit], ignore_index=True)

    # Ensure numeric columns are floats
    df['latency_s'] = pd.to_numeric(df['latency_s'], errors='coerce')
    df['peak_vram_mib'] = pd.to_numeric(df['peak_vram_mib'], errors='coerce')

    # --- Boxplot for latency ---
    plt.figure(figsize=(8,4))
    ax = sns.boxplot(x='mode', y='latency_s', data=df)
    ax.set_title('Latency per image (s)')
    plt.savefig(out_prefix + '_latency.png', dpi=200)
    plt.close()

    # --- Boxplot for peak VRAM ---
    plt.figure(figsize=(8,4))
    ax = sns.boxplot(x='mode', y='peak_vram_mib', data=df)
    ax.set_title('Peak VRAM per image (MiB)')
    plt.savefig(out_prefix + '_vram.png', dpi=200)
    plt.close()

    print('Saved plots with prefix', out_prefix)

print('Plotting utilities ready.')

In [None]:
def plot_summary_metrics(summary, out_prefix='results/summary'):
    metrics_info = {
        'CIDEr': ('cider_fp16', 'cider_int8', 'cider_int8_vit'),
        'Model Size (MB)': ('model_size_fp16_mb', 'model_size_int8_mb', 'model_size_int8_vit_mb'),
        'Throughput (img/s)': ('throughput_fp16_img_s', 'throughput_int8_img_s', 'throughput_int8_vit_img_s'),
        #'Energy (Wh)': ('energy_kwh_fp16', 'energy_kwh_int8', 'energy_kwh_int8_vit')
    }

    for metric_name, (fp16_key, int8_key, int8_vit_key) in metrics_info.items():
        fp16_value = summary[fp16_key]
        int8_value = summary[int8_key]
        int8_vit_value = summary[int8_vit_key]

        # Convert energy from kWh to Wh
        if 'Energy' in metric_name:
            fp16_value *= 1000
            int8_value *= 1000
            int8_vit_value *= 1000
            fmt = '{:.7f}'
        else:
            fmt = '{:.3f}'

        values = {
            'Mode': ['FP16', 'INT8', 'INT8_VIT'],
            'Value': [fp16_value, int8_value, int8_vit_value]
        }
        df = pd.DataFrame(values)

        plt.figure(figsize=(5,4))
        ax = sns.barplot(x='Mode', y='Value', hue='Mode', data=df, palette='pastel', dodge=False, legend=False)
        ax.set_title(metric_name)

        # Annotate with proper precision
        for p in ax.patches:
            ax.annotate(fmt.format(p.get_height()),
                        (p.get_x() + p.get_width() / 2., p.get_height()),
                        ha='center', va='bottom', fontsize=10)

        plt.tight_layout()
        safe_name = re.sub(r'[^A-Za-z0-9_]+', '_', metric_name)
        file_path = f"{out_prefix}_{safe_name}.png"
        plt.savefig(file_path, dpi=200)
        plt.close()

        print(f"Saved plot for {metric_name}: {file_path}")

## 9 — Orchestrator

In [None]:
import os, csv, time, gc, torch
from codecarbon import EmissionsTracker

def run_inference_batch(
    batch_id,
    model_name,
    pipe,
    dataset,
    batch_size,
    exp_cfg,
    img_field,
    refs_field
):
    os.makedirs("results", exist_ok=True)

    # Calculate indices for this batch
    start_idx = (batch_id - 1) * batch_size
    end_idx = min(start_idx + batch_size, len(dataset))
    csv_path = f"results/{model_name.lower()}_batch_{batch_id:02d}.csv"

    # Skip if already exists
    if os.path.exists(csv_path):
        print(f"Skipping batch {batch_id}: {csv_path} already exists.")
        return csv_path

    print(f"Running {model_name} batch {batch_id}: {start_idx}–{end_idx}")

    if hasattr(dataset, "iloc"):
        # Pandas DataFrame
        data_slice = dataset.iloc[start_idx:end_idx]
    elif isinstance(dataset, list):
        data_slice = dataset[start_idx:end_idx]
    elif hasattr(dataset, "select"):
        # Hugging Face Dataset
        data_slice = dataset.select(range(start_idx, end_idx))
    else:
        raise TypeError(f"Unsupported dataset type: {type(dataset)}")

    log_path = f"results/{model_name.lower()}_n_samples_{len(data_slice)}_emissions.csv"
    tracker = EmissionsTracker(
        project_name=f"{model_name}_n_samples_{len(data_slice)}",
        output_dir="results",
        output_file=os.path.basename(log_path),
        save_to_file=True
    )
    tracker.start()

    # Inference loop
    t0 = time.perf_counter()
    batch_results = evaluate_full(
        data_slice,
        pipe,
        img_field=img_field,
        refs_field=refs_field,
        max_items=None,
        img_size=exp_cfg["img_size"],
        max_new_tokens=exp_cfg["max_new_tokens"],
    )
    t1 = time.perf_counter()
    tracker.stop()

    # Save batch results
    with open(csv_path, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=["image_id", "references", "pred", "latency_s", "peak_vram_mib"])
        writer.writeheader()
        writer.writerows(batch_results)

    # Free memory
    gc.collect()
    torch.cuda.empty_cache()

    print(f"Saved {len(batch_results)} results → {csv_path} ({t1 - t0:.1f}s)")
    return csv_path

## 10 — Run the experiment

In [None]:
import os, gc, csv, time, torch, pandas as pd
from datetime import datetime

exp_cfg = EXP["inference"]
model_id = EXP["model_id"]
limit = EXP["dataset"]["limit"]
batch_size = limit // 5  # divide dataset into 5 equal parts
IMG_FIELD, REF_FIELD = "coco_url", "captions"

os.makedirs("results", exist_ok=True)
print(f"Setup complete. Processing {limit} images in 5 batches of {batch_size}.")

In [None]:
print("Building FP16 model …")
pipe_fp16 = build_fp16_model(model_id)
fp16_size = get_model_size(pipe_fp16.model)
print(f"FP16 model size: {fp16_size:.1f} MB")

In [None]:
run_inference_batch(
    batch_id=1,
    model_name="FP16",
    pipe=pipe_fp16,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=2,
    model_name="FP16",
    pipe=pipe_fp16,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=3,
    model_name="FP16",
    pipe=pipe_fp16,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=4,
    model_name="FP16",
    pipe=pipe_fp16,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=5,
    model_name="FP16",
    pipe=pipe_fp16,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
del pipe_fp16

In [None]:
print("Building INT8 model …")
pipe_int8 = build_int8_model(model_id)
int8_size = get_model_size(pipe_int8.model)
print(f"INT8 model size: {int8_size:.1f} MB")

In [None]:
run_inference_batch(
    batch_id=1,
    model_name="INT8",
    pipe=pipe_int8,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=2,
    model_name="INT8",
    pipe=pipe_int8,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=3,
    model_name="INT8",
    pipe=pipe_int8,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=4,
    model_name="INT8",
    pipe=pipe_int8,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=5,
    model_name="INT8",
    pipe=pipe_int8,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
del pipe_int8

In [None]:
print("Building INT8 VIT model …")
pipe_int8_vit = build_vit_int8_model(model_id)
int8_vit_size = get_model_size(pipe_int8_vit.model)
print(f"INT8 VIT model size: {int8_vit_size:.1f} MB")

In [None]:
run_inference_batch(
    batch_id=1,
    model_name="INT8_VIT",
    pipe=pipe_int8_vit,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=2,
    model_name="INT8_VIT",
    pipe=pipe_int8_vit,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=3,
    model_name="INT8_VIT",
    pipe=pipe_int8_vit,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=4,
    model_name="INT8_VIT",
    pipe=pipe_int8_vit,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
run_inference_batch(
    batch_id=5,
    model_name="INT8_VIT",
    pipe=pipe_int8_vit,
    dataset=ds,
    batch_size=batch_size,
    exp_cfg=EXP["inference"],
    img_field=IMG_FIELD,
    refs_field=REF_FIELD
)

In [None]:
del pipe_int8_vit

## 10 — Compute metrics and compare

In [None]:
import glob, ast, re, pandas as pd

def merge_batches(prefix):
    files = sorted(glob.glob(f"results/{prefix}_batch_*.csv"))
    if not files:
        raise FileNotFoundError(f"No batch files found for prefix '{prefix}'")

    print(f"Merging {len(files)} batch files for '{prefix}'...")
    dfs = [pd.read_csv(f) for f in files]
    df = pd.concat(dfs, ignore_index=True)

    # Clean up references
    def parse_refs(ref):
        if isinstance(ref, str):
            try:
                parsed = ast.literal_eval(ref)
                if isinstance(parsed, list):
                    return [str(x) for x in parsed]
            except Exception:
                return [ref]
        return [str(ref)]

    df["references"] = df["references"].apply(parse_refs)

    avg_latency = df["latency_s"].mean()
    throughput = len(df) / df["latency_s"].sum()

    print(f"Merged {len(df)} results. Avg latency: {avg_latency:.2f}s, throughput: {throughput:.3f} img/s")

    return {
        "results": df.to_dict(orient="records"),
        "avg_latency_s": avg_latency,
        "throughput_img_s": throughput
    }

In [None]:
fp16_data = merge_batches("fp16")
print(f"Merged FP16 results → {len(fp16_data['results'])} samples")

In [None]:
int8_data = merge_batches("int8")
print(f"Merged INT8 results → {len(int8_data['results'])} samples")

In [None]:
int8_vit_data = merge_batches("int8_vit")
print(f"Merged INT8 VIT results → {len(int8_vit_data['results'])} samples")

In [None]:
print("Computing CIDEr scores...")
cider_fp16 = compute_cider(fp16_data["results"])
cider_int8 = compute_cider(int8_data["results"])
cider_int8_vit = compute_cider(int8_vit_data["results"])

print(f"CIDEr FP16: {cider_fp16:.3f} | INT8: {cider_int8:.3f} | INT8-VIT: {cider_int8_vit:.3f}")

plot_comparison(fp16_data["results"], int8_data["results"], int8_vit_data["results"])
print("Comparison plot generated.")

summary = {
    "cider_fp16": cider_fp16,
    "cider_int8": cider_int8,
    "cider_int8_vit": cider_int8_vit,
    "delta_cider_int8": cider_int8 - cider_fp16,
    "delta_cider_int8_vit": cider_int8_vit - cider_fp16,
    "avg_latency_fp16_s": fp16_data["avg_latency_s"],
    "avg_latency_int8_s": int8_data["avg_latency_s"],
    "avg_latency_int8_vit_s": int8_vit_data["avg_latency_s"],
    "throughput_fp16_img_s": fp16_data["throughput_img_s"],
    "throughput_int8_img_s": int8_data["throughput_img_s"],
    "throughput_int8_vit_img_s": int8_vit_data["throughput_img_s"],
    "model_size_fp16_mb": fp16_size,
    "model_size_int8_mb": int8_size,
    "model_size_int8_vit_mb": int8_vit_size,
    "n_samples": len(fp16_data["results"]),
    "timestamp": datetime.utcnow().isoformat() + "Z"
}

plot_summary_metrics(summary)

summary_path = f"results/summary_{datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')}.json"
with open(summary_path, "w") as f:
    json.dump(summary, f, indent=2)

print(f"Summary saved to: {summary_path}")

## Captioning Examples

In [None]:
import random
import matplotlib.pyplot as plt

def show_random_captions(pipe, dataset, img_field, refs_field, n_samples=3):
    for _ in range(n_samples):
        idx = random.randint(0, len(dataset) - 1)
        row = dataset[idx]

        image_path = row.get(img_field) if isinstance(row, dict) else row[img_field]
        img = load_image(image_path)
        plt.imshow(img)
        plt.axis("off")
        plt.title(f"Image {idx}")
        plt.show()

        references = row.get(refs_field, [])
        if isinstance(references, str):
            try:
                references = eval(references)
            except Exception:
                references = [references]

        try:
            pred = generate_caption_with_model(pipe, img).lower().split()
            print(f"Predicted Caption:\n{' '.join(pred)}\n")
        except Exception as e:
            print(f"Error generating caption: {e}")
            continue

        print(f"References: {references}\n")

### FP16 Model

In [None]:
try:
    pipe_fp16
except NameError:
    pipe_fp16 = build_fp16_model(EXP["model_id"])
show_random_captions(pipe_fp16, ds, IMG_FIELD, REF_FIELD)

### INT8

In [None]:
try:
    pipe_int8
except NameError:
    pipe_int8 = build_int8_model(EXP["model_id"])
show_random_captions(pipe_int8, ds, IMG_FIELD, REF_FIELD)

### INT8_VIT

In [None]:
try:
    pipe_int8_vit
except NameError:
    pipe_int8_vit = build_vit_int8_model(EXP["model_id"])
show_random_captions(pipe_int8_vit, ds, IMG_FIELD, REF_FIELD)