# Transformers (Hugging Face) Model - Downcasting

In [None]:
import torch
from transformers import BlipForConditionalGeneration, BlipProcessor
from src.model.utils import load_image, get_generation
from IPython.display import display

## Load Model

In [None]:
model_name = "Salesforce/blip-image-captioning-base"

In [None]:
model = BlipForConditionalGeneration.from_pretrained(model_name)

## Memory Footprint

In [None]:
fp32_mem_footprint = model.get_memory_footprint()

In [None]:
print("Footprint of the fp32 model in bytes: ",
      fp32_mem_footprint)
print("Footprint of the fp32 model in MBs: ", 
      fp32_mem_footprint/1e+6)

## Downcasting

In [None]:
model_bf16 = BlipForConditionalGeneration.from_pretrained(
                                               model_name,
                               torch_dtype=torch.bfloat16
)

In [None]:
bf16_mem_footprint = model_bf16.get_memory_footprint()

## Memory Footprint Reduction

In [None]:
print("Footprint of the fp32 model in bytes: ",
      bf16_mem_footprint)
print("Footprint of the fp32 model in MBs: ", 
      bf16_mem_footprint/1e+6)

In [None]:
print("Memory footprint reduction: ", 
      (fp32_mem_footprint - bf16_mem_footprint)/fp32_mem_footprint*100, '%')

## Model Performance

In [None]:
img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg'

image = load_image(img_url)
display(image.resize((500, 350)))

In [None]:
processor = BlipProcessor.from_pretrained(model_name)

In [None]:
results_fp32 = get_generation(model, 
                              processor, 
                              image, 
                              torch.float32)

In [None]:
print("fp32 Model Results:\n", results_fp32)

In [None]:
results_bf16 = get_generation(model_bf16, 
                              processor, 
                              image, 
                              torch.bfloat16)

In [None]:
print("bf16 Model Results:\n", results_bf16)