In [1]:
!pip install huggingface-hub Pillow



In [2]:
# Importing Libraries
import os
from peft import PeftModel, PeftConfig
from transformers import PaliGemmaProcessor, AutoModelForPreTraining
from huggingface_hub import login
import torch
from PIL import Image
import requests

# Login to Hugging Face Hub
token = os.getenv('HF_TOKEN')
login(token=token)

# Load PeftConfig and base model
config = PeftConfig.from_pretrained("DJPAUL25/paligemma_CS")
base_model = AutoModelForPreTraining.from_pretrained("google/paligemma-3b-pt-224")
model_finetuned = PeftModel.from_pretrained(base_model, "DJPAUL25/paligemma_CS")

# Loading PaliGemma Processor
processor = PaliGemmaProcessor.from_pretrained("google/paligemma-3b-pt-224")

def do_inference(processor, input_text, input_image, model):
    # Preprocess Inputs
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    inputs = processor(text=input_text, images=input_image, padding="longest", do_convert_rgb=True, return_tensors="pt").to(device)
    model.to(device)
    inputs = inputs.to(dtype=model.dtype)
    
    # Generating and Decoding Output
    with torch.no_grad():
        output = model.generate(**inputs, max_length=496)
        
    return processor.decode(output[0], skip_special_tokens=True)

  from .autonotebook import tqdm as notebook_tqdm


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/Ubuntu/.cache/huggingface/token
Login successful


`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.
Loading checkpoint shards: 100%|██████████| 3/3 [00:02<00:00,  1.17it/s]


### Fine-tuned Model

In [3]:
# Loading and Processing the Image
input_text = "What's in this image?"
img_url = "https://media.licdn.com/dms/image/C5612AQEwy6oxw1jNfA/article-cover_image-shrink_720_1280/0/1652105355771?e=2147483647&v=beta&t=Dp48_xxmmrKerkFHiRr9md32I7ERU3dj1-RUGr4vYdg"
input_image = Image.open(requests.get(img_url, stream=True).raw)

res = do_inference(processor, input_text, input_image, model_finetuned)
print(res)

What's in this image?
Bubble sort using Python
def bubbleSort(list):
for i in range(len(list)):
for j in range(len(list) - 1, 1, -1):
if list[j] < list[j - 1]:
list[j], list[j - 1] = list[j - 1], list[j]
return list
if
name == 'main__':
List = [8, 4, 2, 6, 5, 7, 1, 9]
print('Sorted List:', bubbleSort(list))
#lccoding.com
Sorted list: [1, 2, 4, 5, 6, 7, 8, 9]


### Pre-Trained Model

In [4]:
import os
from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration, BitsAndBytesConfig, TrainingArguments, Trainer
import torch
from peft import get_peft_model, LoraConfig

model_id = "google/paligemma-3b-pt-224"

# Loading Quantised Model (QLoRA)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_type=torch.bfloat16
)
lora_config = LoraConfig(
    r=8,
    target_modules=["q_proj", "o_proj", "k_proj", "v_proj",
                    "gate_proj", "up_proj", "down_proj"],
    task_type="CAUSAL_LM",
)

model = PaliGemmaForConditionalGeneration.from_pretrained(model_id,
                                                          quantization_config=bnb_config,
                                                          device_map={"": 0})
model = get_peft_model(model, lora_config)

Unused kwargs: ['bnb_4bit_compute_type']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.17s/it]


In [5]:
res = do_inference(processor, input_text, input_image, model)
print(res)



What's in this image?
text
