In [None]:
from blip_quantizer import BlipQuantizer, QuantConfig, ModelPart, LayerGroup, LayerType
from quant_functions import uniform_quantization
import torch
from transformers import Blip2Processor, Blip2ForConditionalGeneration
from datasets import COCODataset
from tqdm import tqdm
from PIL import Image
from torch.utils.data import DataLoader
from utils import print_model_structure

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b")
model = model.to(device)


quantizer = BlipQuantizer(model)
configs = [
    QuantConfig(
        ModelPart.VIT,
        LayerGroup.MIDDLE,
        LayerType.MLP,
        uniform_quantization,
        num_bits=8,
    ),
    QuantConfig(
        ModelPart.QFORMER,
        LayerGroup.MIDDLE,
        LayerType.MLP,
        uniform_quantization,
        num_bits=4,
    ),
    QuantConfig(
        ModelPart.LLM,
        LayerGroup.MIDDLE,
        LayerType.MLP,
        uniform_quantization,
        num_bits=4,
    ),
]


print("Quantizing model...")
quantizer.apply_quantization(configs)

# print_model_structure(model)

In [None]:
from evaluation_pipeline import EvaluationPipeline

# from evaluation_pipeline import EvaluationPipeline
coco_dataset = COCODataset(
    ann_file="./data/coco/annotations/captions_val2017.json",
    img_dir="./data/coco/val2017",
)

model_name = "Salesforce/blip2-opt-2.7b"
processor = Blip2Processor.from_pretrained(model_name)

# Create evaluator
evaluator = EvaluationPipeline(model, processor, device)

# Evaluate
print("Starting evaluation...")
coco_results = evaluator.evaluate(coco_dataset, task="image_captioning", max_samples=1000)

# Save results
evaluator.save_results(coco_results, "./results/coco_quantized_evaluation.json")

# Print overall CIDEr score
print(f"COCO CIDEr score: {coco_results['overall_cider']}")

# Print a few example predictions
print("\nExample predictions:")
for i in range(5):  # Print first 5 predictions
    print(f"Image ID: {coco_results['predictions'][i]['image_id']}")
    print(f"Prediction: {coco_results['predictions'][i]['caption']}")
    print(f"References: {coco_results['references'][i]}")
    print(f"Individual CIDEr score: {coco_results['individual_cider'][i]}")
    print()

In [4]:
from inference_pipeline import InferencePipeline

processor = Blip2Processor.from_pretrained(model_name)

# Set up the model, processor, and dataset as before
model_name = "Salesforce/blip2-opt-2.7b"
coco_dataset = COCODataset(
    ann_file="./data/coco/annotations/captions_val2017.json",
    img_dir="./data/coco/val2017",
)

# Run inference
inferencer = InferencePipeline(model, processor, device)
print("Starting inference...")
results = inferencer.run_inference(coco_dataset, task="image_captioning", max_samples=20)
inferencer.save_results(results, "./results/coco_quantized_inference.json")

In [None]:
from scoring_pipeline import ScoringPipeline

# Compute scores
scorer = ScoringPipeline()
loaded_results = scorer.load_results("./results/coco_quantized_inference.json")
scores = scorer.compute_scores(loaded_results, task="image_captioning")

# Print scores
for metric, score in scores.items():
    if not metric.endswith("_per_caption"):
        print(f"{metric}: {score}")

In [None]:
import gc

model.to("cpu")
del model, evaluator
gc.collect()
torch.cuda.empty_cache()