# Prepare PaliGemma for deployment

Run the cells below and follow the instructions to deploy the model to the endpoint. You should set the PROCESSING_DIR variable to a directory on your machine which is not git-tracked.  

In [7]:
PROCESSING_DIR = "./TEMPS" 

In [8]:
!mkdir -p {PROCESSING_DIR}/code

In [None]:
%%writefile {PROCESSING_DIR}/code/requirements.txt
accelerate
bitsandbytes
git+https://github.com/huggingface/transformers.git@v4.41.2
Pillow

In [None]:
%%writefile {PROCESSING_DIR}/code/inference.py
from transformers import AutoTokenizer, PaliGemmaForConditionalGeneration, PaliGemmaProcessor
import torch
import base64
from io import BytesIO
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def get_RGB_image_io(image_bytes):
    image_io = BytesIO(image_bytes)
    image = Image.open(image_io)
    resized_image = image.convert("RGB")
    return resized_image

def model_fn(model_dir):
    model = PaliGemmaForConditionalGeneration.from_pretrained(model_dir, torch_dtype=torch.bfloat16).to(device)
    # Load the processor
    processor = PaliGemmaProcessor.from_pretrained(model_dir)
    return [model, processor]


def predict_fn(data, model_process):
    model = model_process[0]
    processor = model_process[1]
    
    # get prompt & image
    prompt = data.get("prompt", "")
    image_b64=data.get("image", "")
    
    # decode image from Base64
    image_data = base64.b64decode(image_b64)
    input_image = get_RGB_image_io(image_data)

    inputs = processor(text=prompt, images=input_image, padding="longest", do_convert_rgb=True, return_tensors="pt").to(device)
    inputs = inputs.to(dtype=model.dtype)

    with torch.no_grad():
        output = model.generate(**inputs, max_length=496)
        str_out = processor.decode(output[0], skip_special_tokens=True)
    return {"response": str_out}

## Fetch the model

Alternatively to the following code, you can download the model by yourself (using Huggingface CLI, ...)

In [None]:
from shutil import copytree
from pathlib import Path
from huggingface_hub import snapshot_download
import random
HF_MODEL_ID="google/paligemma-3b-mix-224"
# you need to accept the Gemma terms and conditions at: https://huggingface.co/google/paligemma-3b-mix-224
HF_TOKEN=input("Please fill in your HuggingFace token: ")
# "Please set HF_TOKEN to your huggingface token. You can find it here: https://huggingface.co/settings/tokens"
assert len(HF_TOKEN) > 0

# download snapshot
snapshot_dir = snapshot_download(
    repo_id=HF_MODEL_ID,
    use_auth_token=HF_TOKEN,
    local_dir=f"{PROCESSING_DIR}/hf_download"
    )

# create model dir
model_folder_name=f"model-{random.getrandbits(16)}"
model_tar = Path(PROCESSING_DIR, model_folder_name)
model_tar.mkdir(exist_ok=True)

# copy snapshot to model dir
copytree(snapshot_dir, str(model_tar), dirs_exist_ok=True)

# copy code/ to model dir
copytree(f"{PROCESSING_DIR}/code/", str(model_tar.joinpath("code")), dirs_exist_ok=True)

## Compress the model

In [None]:
!cd {PROCESSING_DIR}; tar cvf model.tar.gz -C ./{model_folder_name} .