In [None]:
import gc
import time
from transformers import AutoProcessor, Gemma3ForConditionalGeneration
from PIL import Image
import requests
import torch


def clear_memory():
    # Delete variables if they exist in the current global scope
    if "inputs" in globals():
        del globals()["inputs"]
    if "model" in globals():
        del globals()["model"]
    if "processor" in globals():
        del globals()["processor"]
    if "trainer" in globals():
        del globals()["trainer"]
    if "peft_model" in globals():
        del globals()["peft_model"]
    if "bnb_config" in globals():
        del globals()["bnb_config"]
    time.sleep(2)

    # Garbage collection and clearing CUDA memory
    gc.collect()
    time.sleep(2)
    torch.cuda.empty_cache()
    torch.cuda.synchronize()
    time.sleep(2)
    gc.collect()
    time.sleep(2)

    print(f"GPU allocated memory: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
    print(f"GPU reserved memory: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")


In [None]:
from PIL import Image
import pandas as pd
import os
from tqdm import tqdm

folder_paths_raw = ['Test']
prefix = ['Data/real_clocks/']
# prefix = ['Data/real_clocks/','Data/standard/','Data/distorted/','Data/thin_hands/']
folder_paths = [p + f for p in prefix for f in folder_paths_raw]

def get_png_files(path):
    return [f for f in os.listdir(path) if f.endswith('.png')]

model_id = "google/gemma-3-12b-it"
model_raw_name = model_id.split('/')[-1]

prompt = '''What time is shown on the clock in the given image?'''
for folder_path, prefix_tmp in zip(folder_paths, prefix):
    # for finetune_data in ['_Train','_Train_1000','_Train_5000']:
    for finetune_data in ['_Train']:
    
        model = Gemma3ForConditionalGeneration.from_pretrained(
            model_id,  device_map="cuda", torch_dtype=torch.bfloat16
        )

        processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
        
        adapter_path = f"Finetuned_models/{prefix_tmp.split('/')[1]}_Gemma/Gemma3{finetune_data}"
        model.load_adapter(adapter_path)

    
        
        output_path = f'{prefix_tmp}{folder_paths_raw[0]}_{model_raw_name}{finetune_data}_Results.xlsx'
        print(output_path)
        if os.path.exists(output_path):
            continue
        df = pd.DataFrame(columns=['answer', f'answer-{model_raw_name}'])
        i = 0
        png_files = get_png_files(folder_path)
        for file_name in tqdm(png_files, desc=f"Processing {folder_path} PNG files"):
            image_path = os.path.join(folder_path, file_name)
            image = Image.open(image_path)
            image = image.convert("RGB")
            messages = [
                {"role": "user", "content": [
                    {"type": "image", "image": image},
                    {"type": "text", "text": prompt}
                ]}
            ]
            inputs = processor.apply_chat_template(
                messages, add_generation_prompt=True, tokenize=True,
                return_dict=True, return_tensors="pt"
            ).to(model.device, dtype=torch.bfloat16)
            
            input_len = inputs["input_ids"].shape[-1]
            
            with torch.inference_mode():
                generation = model.generate(**inputs, max_new_tokens=500, do_sample=False,top_p=None,top_k=None)
                generation = generation[0][input_len:]
            
            decoded = processor.decode(generation, skip_special_tokens=True)
            
            df.at[i, 'answer'] = file_name.replace('.png','').replace('_',':')
            df.at[i, f'answer-{model_raw_name}'] = decoded
            # print(decoded)
            i += 1
            
        df.to_excel(output_path, index=False, engine='openpyxl')
        clear_memory()