In [None]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [1]:
import kagglehub

# Download latest version
path = kagglehub.model_download("google/paligemma-2/transformers/paligemma2-3b-pt-224")

print("Path to model files:", path)

Path to model files: /kaggle/input/paligemma-2/transformers/paligemma2-3b-pt-224/1


In [None]:
# Install dependencies
!pip install -q transformers peft nltk rouge-score wandb

In [None]:
# Verify GPU
import torch
print(torch.__version__)
print(torch.cuda.is_available())

In [2]:
import wandb
wandb.login(key="d070aabfe54f4733fb727662604b037dee34842c")


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33madigew[0m ([33madigew-middle-east-technical-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory

data = pd.read_csv("/kaggle/input/riscmm/RISCM/captions.csv")
data.head()

Unnamed: 0,source,split,image,caption_1,caption_2,caption_3,caption_4,caption_5
0,NWPU,test,NWPU_31430.jpg,A gray plane on the runway and the lawn beside .,A grey plane is on the runway by the lawn .,There is an airplane on the runway with a larg...,A plane is parked on the runway next to the gr...,There is a plane on the runway beside the grass .
1,NWPU,test,NWPU_31431.jpg,Three small planes parked in a line on the air...,"There are four aircraft on the open ground, Th...",There are many planes of different sizes in a ...,Four planes are parked on the runway .,Four planes of different sizes were on the mar...
2,NWPU,test,NWPU_31432.jpg,A plane parked in a line on the airport with s...,A white plane was parked on the instruction li...,An airplane parked in an open area with many c...,A plane is parked on the open space .,There is 1 plane on the ground marked .
3,NWPU,test,NWPU_31433.jpg,A small plane and a big plane parked next to b...,A white plane and a gray plane parked at the b...,Two planes of different sizes are neatly parke...,A large plane and a small plane are parked nea...,Two planes are on the marked ground .
4,NWPU,test,NWPU_31434.jpg,Two planes parked next to boarding bridges .,Two aircraft were parked at the departure gates .,Two planes of different sizes are neatly parke...,Two planes are parked next to the terminal .,Two planes are on the marked ground .


Load Dataset with Small Partition

In [4]:
import pandas as pd
import os

def load_small_partition(image_dir, caption_file, sample_size=300):
    df = pd.read_csv(caption_file)

    # Filter out missing images
    valid_images = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]
    df = df[df['image'].isin(valid_images)]

    # Use existing splits
    train_df = df[df['split'] == 'train'].sample(frac=1, random_state=42).head(sample_size)
    val_df = df[df['split'] == 'test'].sample(frac=1, random_state=42).head(int(0.2 * sample_size))

    print(f"Loaded small partition: {len(train_df)} train, {len(val_df)} val")
    return train_df.reset_index(drop=True), val_df.reset_index(drop=True)

# Set paths
image_dir = "/kaggle/input/riscmm/RISCM/resized"
caption_file = "/kaggle/input/riscmm/RISCM/captions.csv"

# Load data
train_df, val_df = load_small_partition(image_dir, caption_file, sample_size=100)

Loaded small partition: 100 train, 20 val


Training Function

In [5]:
import torch
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
from peft import LoraConfig, get_peft_model
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torch.amp import autocast, GradScaler
import torch.optim as optim

class RISCDataset(Dataset):
    def __init__(self, image_dir, df):
        self.image_dir = image_dir
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image_path = os.path.join(self.image_dir, row.image)
        image = Image.open(image_path).convert('RGB')
        caption = row.caption_1
        return {"image": image, "caption": caption}

def custom_collate_fn(batch):
    images = [item["image"] for item in batch]
    captions = [item["caption"] for item in batch]
    return {"images": images, "captions": captions}

def train_lora(model_name, image_dir, train_df, val_df, caption_file, output_dir,
               lora_rank=32, epochs=2, learning_rate=5e-4,
               max_train_samples=None, max_val_samples=None,
               batch_size=1, accum_steps=8,
               target_modules=["q_proj", "v_proj"]):

    wandb.init(project="DI725_Phase2", name=f"LoRA-R{lora_rank}")
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = PaliGemmaForConditionalGeneration.from_pretrained(model_name, torch_dtype=torch.float16).to(device)
    processor = PaliGemmaProcessor.from_pretrained(model_name, use_fast=True)

    lora_config = LoraConfig(
        r=lora_rank,
        lora_alpha=32,
        target_modules=target_modules,
        lora_dropout=0.1
    )
    model = get_peft_model(model, lora_config)

    # Use provided train/val DataFrames
    if max_train_samples:
        train_df = train_df.head(max_train_samples)
    if max_val_samples:
        val_df = val_df.head(max_val_samples)

    train_dataset = RISCDataset(image_dir, train_df)
    val_dataset = RISCDataset(image_dir, val_df)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate_fn)
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
    scaler = GradScaler()

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        steps = 0
        optimizer.zero_grad()

        for batch_idx, batch in enumerate(train_loader):
            try:
                images = batch["images"]
                captions = [f"<image> caption {cap}" for cap in batch["captions"]]

                inputs = processor(text=captions, images=images, return_tensors="pt", padding="longest").to(device)

                with autocast("cuda"):
                    outputs = model(
                        input_ids=inputs["input_ids"],
                        attention_mask=inputs["attention_mask"],
                        pixel_values=inputs["pixel_values"],
                        labels=inputs["input_ids"]
                    )
                    loss = outputs.loss / accum_steps

                scaler.scale(loss).backward()

                if (batch_idx + 1) % accum_steps == 0 or (batch_idx + 1) == len(train_loader):
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()

                total_loss += loss.item() * accum_steps
                steps += 1

                if steps % 50 == 0:
                    print(f"Epoch {epoch+1}, Step {steps}, Loss: {loss.item() * accum_steps:.4f}")

            except Exception as e:
                print(f"Error in batch {batch_idx}: {e}")
                continue

        avg_train_loss = total_loss / steps if steps > 0 else 0
        wandb.log({"epoch": epoch+1, "train_loss": avg_train_loss})

        # Validation loop
        model.eval()
        val_loss = 0
        val_steps = 0
        for batch in val_loader:
            try:
                images = batch["images"]
                captions = [f"<image> caption {cap}" for cap in batch["captions"]]
                inputs = processor(text=captions, images=images, return_tensors="pt", padding="longest").to(device)

                with torch.no_grad(), autocast("cuda"):
                    outputs = model(**inputs, labels=inputs["input_ids"])
                    val_loss += outputs.loss.item()
                val_steps += 1

            except Exception as e:
                print(f"Validation error: {e}")
                continue

        avg_val_loss = val_loss / val_steps if val_steps > 0 else 0
        wandb.log({"epoch": epoch+1, "val_loss": avg_val_loss})
        print(f"Epoch {epoch+1}, Validation Loss: {avg_val_loss:.4f}")
        model.train()

    model.save_pretrained(output_dir)
    processor.save_pretrained(output_dir)
    wandb.finish()

2025-05-17 18:34:26.761920: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747506866.785510     260 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747506866.792901     260 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Evaluation Functions

In [6]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from transformers import PaliGemmaProcessor, PaliGemmaForConditionalGeneration
from peft import PeftModel 
from PIL import Image
import torch
from torch.amp import autocast
import pandas as pd
import os

# Set environment variable for better memory management
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

# Define smoother for BLEU
smoothie = SmoothingFunction().method1


def evaluate_zero_shot(model_name, image_dir, val_df, num_samples=10):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    
    # Load model and processor
    model = PaliGemmaForConditionalGeneration.from_pretrained(model_name).to(device).to(torch.float16)
    processor = PaliGemmaProcessor.from_pretrained(model_name)

    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    bleu_scores = []
    rouge_scores = []

    print("Running zero-shot evaluation...")
    val_df = val_df.reset_index(drop=True)

    for i in range(min(num_samples, len(val_df))):
        inputs, output_ids, image = None, None, None
        try:
            row = val_df.iloc[i]
            image_path = os.path.join(image_dir, row.image)
            if not os.path.exists(image_path):
                raise FileNotFoundError(f"Image not found: {image_path}")
            
            image = Image.open(image_path).convert('RGB')
            prompt = "<image> caption"

            inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)

            with torch.inference_mode(), autocast("cuda", dtype=torch.float16):
                output_ids = model.generate(
                    **inputs,
                    max_new_tokens=50,
                    num_beams=3,
                    repetition_penalty=1.2,
                    eos_token_id=processor.tokenizer.eos_token_id
                )
                caption = processor.decode(output_ids[0], skip_special_tokens=True).strip()

            reference = row.caption_1
            hypothesis = caption

            # Filter out garbage captions
            if len(hypothesis.split()) < 2 or "##" in hypothesis:
                print(f"Invalid caption generated at sample {i}. Skipping...")
                continue

            # Compute BLEU with smoothing
            bleu = sentence_bleu([reference.split()], hypothesis.split(), smoothing_function=smoothie)
            bleu_scores.append(bleu)

            # Compute ROUGE-L
            rs = scorer.score(reference, hypothesis)['rougeL'].fmeasure
            rouge_scores.append(rs)

            print(f"\nSample {i} - Reference: {reference}\nHypothesis: {caption}")

        except Exception as e:
            print(f"Error evaluating sample {i}: {e}")
        finally:
            # Clean up memory
            if 'inputs' in locals():
                del inputs
            if 'output_ids' in locals():
                del output_ids
            if 'image' in locals():
                del image
            torch.cuda.empty_cache()

    avg_bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0
    avg_rouge = sum(rouge_scores) / len(rouge_scores) if rouge_scores else 0

    print(f"\nZero-shot Results: BLEU-4={avg_bleu:.4f}, ROUGE-L={avg_rouge:.4f}")
    return {
        "zero_shot_BLEU": avg_bleu,
        "zero_shot_ROUGE_L": avg_rouge
    }


def evaluate_lora_model(lora_model_path, base_model_name, image_dir, val_df, num_samples=10):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Load base model and apply LoRA weights
    base_model = PaliGemmaForConditionalGeneration.from_pretrained(base_model_name).to(device).to(torch.float16)
    model = PeftModel.from_pretrained(base_model, lora_model_path).to(device)
    processor = PaliGemmaProcessor.from_pretrained(base_model_name)

    scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

    bleu_scores = []
    rouge_scores = []

    # Use varied prompts
    prompt_prefixes = [
        "<image> Describe this remote sensing image.",
        "<image> What is shown in this satellite image?",
        "<image> Generate a one-sentence description of this image."
    ]

    print("Running LoRA model evaluation...")
    val_df = val_df.reset_index(drop=True)

    for i in range(min(num_samples, len(val_df))):
        inputs, output_ids, image = None, None, None
        try:
            row = val_df.iloc[i]
            image_path = os.path.join(image_dir, row.image)
            if not os.path.exists(image_path):
                raise FileNotFoundError(f"Image not found: {image_path}")
            
            image = Image.open(image_path).convert('RGB')
            prompt = prompt_prefixes[i % len(prompt_prefixes)]

            inputs = processor(text=prompt, images=image, return_tensors="pt").to(device)

            with torch.inference_mode(), autocast("cuda", dtype=torch.float16):
                output_ids = model.generate(
                    **inputs,
                    max_new_tokens=50,
                    num_beams=3,
                    repetition_penalty=1.2,
                    no_repeat_ngram_size=3,
                    eos_token_id=processor.tokenizer.eos_token_id,
                    pad_token_id=processor.tokenizer.pad_token_id
                )
                caption = processor.decode(output_ids[0], skip_special_tokens=True).strip()

            reference = row.caption_1
            hypothesis = caption

            # Skip if model just repeats the prompt
            if any(hypothesis.startswith(prefix.replace("<image>", "").strip()) for prefix in prompt_prefixes):
                print(f"Model repeated the prompt at sample {i}. Skipping...")
                continue

            # Skip garbage outputs
            if len(hypothesis.split()) < 3 or "##" in hypothesis or hypothesis.lower().count("caption") > 4:
                print(f"Invalid caption generated at sample {i}: '{hypothesis}'. Skipping...")
                continue

            # Compute metrics
            bleu = sentence_bleu([reference.split()], hypothesis.split(), smoothing_function=smoothie)
            bleu_scores.append(bleu)

            rs = scorer.score(reference, hypothesis)['rougeL'].fmeasure
            rouge_scores.append(rs)

            print(f"\nReference: {reference}\nHypothesis: {hypothesis}")

        except Exception as e:
            print(f"Error evaluating sample {i}: {e}")
        finally:
            if 'inputs' in locals():
                del inputs
            if 'output_ids' in locals():
                del output_ids
            if 'image' in locals():
                del image
            torch.cuda.empty_cache()

    avg_bleu = sum(bleu_scores) / len(bleu_scores) if bleu_scores else 0
    avg_rouge = sum(rouge_scores) / len(rouge_scores) if rouge_scores else 0

    print(f"\nLoRA Model Results: BLEU-4={avg_bleu:.4f}, ROUGE-L={avg_rouge:.4f}")
    return {
        "lora_BLEU": avg_bleu,
        "lora_ROUGE_L": avg_rouge
    }

Train Multiple LoRA Configurations

In [7]:
from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
from peft import PeftModel 

import wandb
wandb.login()

# Set environment variable for better memory management
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

# Define evaluation sample size
num_samples = 10  # ← Define here to avoid NameError

configs = [
    {"lora_rank": 32, "name": "LoRA-R32"},
    {"lora_rank": 8, "name": "LoRA-R8"},
    {"lora_rank": 4, "name": "LoRA-R4-k-o", "target_modules": ["k_proj", "o_proj"]}
]

results = []

# Optional: Evaluate Zero-shot Baseline First
wandb.init(project="DI725_Phase2", name="ZeroShot-Baseline")
zero_shot_results = evaluate_zero_shot(
    model_name="/kaggle/input/paligemma-2/transformers/paligemma2-3b-pt-224/1",
    image_dir=image_dir,
    val_df=val_df,
    num_samples=num_samples
)
wandb.finish()

# Train and evaluate each LoRA config
for config in configs:
    print(f"\n🚀 Training: {config['name']}")
    output_dir = f"./{config['name']}"

    # Initialize WandB for this experiment
    wandb.init(
        project="DI725_Phase2",
        name=config["name"],
        config={
            "lora_rank": config.get("lora_rank"),
            "target_modules": "-".join(config.get("target_modules", ["q_proj", "v_proj"])),
            "num_samples_eval": num_samples
        },
        reinit=True  # Allows multiple init calls in notebook
    )

    # Run training
    train_lora(
        model_name="/kaggle/input/paligemma-2/transformers/paligemma2-3b-pt-224/1",
        image_dir=image_dir,
        caption_file=caption_file,
        train_df=train_df,
        val_df=val_df,
        output_dir=output_dir,
        lora_rank=config.get("lora_rank", 32),
        target_modules=config.get("target_modules", ["q_proj", "v_proj"]),
        epochs=2,
        learning_rate=5e-4,
        batch_size=1,
        accum_steps=8
    )

    # Run evaluation
    result = evaluate_lora_model(
        output_dir,
        "/kaggle/input/paligemma-2/transformers/paligemma2-3b-pt-224/1",
        image_dir,
        val_df,
        num_samples=num_samples
    )
    
    result["config"] = config["name"]
    result["rank"] = config.get("lora_rank", 32)
    result["modules"] = "-".join(config.get("target_modules", ["q_proj", "v_proj"]))
    results.append(result)


    wandb.finish()  # Finish this run before starting a new one

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Running zero-shot evaluation...

Sample 0 - Reference: The storage tanks here are half white and half black .
Hypothesis: caption
image result for image of container port in china

Sample 1 - Reference: The stratus clouds are located above the surface of the sea .
Hypothesis: caption
invention can be seen through the window of invention .

Sample 2 - Reference: The long strip island with dense vegetation is surrounded by light blue waters .
Hypothesis: caption
satellite image of the island

Sample 3 - Reference: The harbor has lots of neatly docked boats and some buildings are next to the harbor .
Hypothesis: caption
property image # directly on the lake with private beach and pontoon

Sample 4 - Reference: parking lot next to planted a few trees .
Hypothesis: caption
police , ambulance and fire department vehicles line a street as seen in this aerial photo

Sample 5 - Reference: A road go across another one diagonally .
Hypothesis: caption
pedestrian bridge over the interstate

Sample


🚀 Training: LoRA-R32


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

It is strongly recommended to train Gemma2 models with the `eager` attention implementation instead of `sdpa`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`.


Epoch 1, Step 50, Loss: 13.5510
Epoch 1, Step 100, Loss: 12.5343
Epoch 1, Validation Loss: 12.4339
Epoch 2, Step 50, Loss: 12.1314
Epoch 2, Step 100, Loss: 12.0420
Epoch 2, Validation Loss: 12.0093
Epoch 3, Step 50, Loss: 12.2082
Epoch 3, Step 100, Loss: 12.1308
Epoch 3, Validation Loss: 11.9279
Epoch 4, Step 50, Loss: 11.6185
Epoch 4, Step 100, Loss: 11.8060
Epoch 4, Validation Loss: 11.8727
Epoch 5, Step 50, Loss: 12.0077
Epoch 5, Step 100, Loss: 11.8092
Epoch 5, Validation Loss: 11.8356


0,1
epoch,▁▁▃▃▅▅▆▆██
train_loss,█▂▁▁▁
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_loss,11.84695
val_loss,11.83564


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Running LoRA model evaluation...
Invalid caption generated at sample 0: 'Describe this remote sensing image.
caption caption caption caption Caption Caption CaptionCaptionCaptionCaption Caption Captioncaption Caption Caption caption caption captions captions captions caption captioncaption caption Caption caption CaptionCaption CaptionCaption caption caption captioned captioned caption captioned caption captionCAPTIONCAPTIONCAPTIONCAPCAPTIONCAPTIONcaptioncaptioncaption captioncaptioncaption'. Skipping...
Invalid caption generated at sample 1: 'What is shown in this satellite image?
caption caption caption caption Caption Caption CaptionCaption CaptionCaptionCaptionCaption Caption Caption caption caption captions captions captions caption caption captioned caption captioned captioned caption captionCAPTIONCAPTIONCAPTIONCAPCAPTIONCAPTIONcaptioncaptioncaption captioncaption caption captions caption captions Caption caption Caption captionCaption caption captioncaption'. Skipping...

Refer

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1, Step 50, Loss: 13.9717
Epoch 1, Step 100, Loss: 12.4640
Epoch 1, Validation Loss: 12.4688
Epoch 2, Step 50, Loss: 11.7667
Epoch 2, Step 100, Loss: 12.0393
Epoch 2, Validation Loss: 12.0135
Epoch 3, Step 50, Loss: 12.0308
Epoch 3, Step 100, Loss: 11.7334
Epoch 3, Validation Loss: 11.9261
Epoch 4, Step 50, Loss: 11.7501
Epoch 4, Step 100, Loss: 12.0760
Epoch 4, Validation Loss: 11.8787
Epoch 5, Step 50, Loss: 11.6648
Epoch 5, Step 100, Loss: 11.9506
Epoch 5, Validation Loss: 11.8439


0,1
epoch,▁▁▃▃▅▅▆▆██
train_loss,█▂▁▁▁
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_loss,11.85417
val_loss,11.84387


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Running LoRA model evaluation...

Reference: The storage tanks here are half white and half black .
Hypothesis: Describe this remote sensing image.
☐ ☐ ☐ ☐☐☐☐■■■■■■■■■■■■■■ ■ ■ ■ • • ••••.•.•.•••°•°•°••°•.•°•.•.•°•°•.••°••.•°•••

Reference: The stratus clouds are located above the surface of the sea .
Hypothesis: What is shown in this satellite image?
                     packagepackagepackage package package package packages packages packagespackagespackagespackagespagespagespagespagepagepage page page page
page page

Reference: The long strip island with dense vegetation is surrounded by light blue waters .
Hypothesis: Generate a one-sentence description of this image.
A A A A a a a aa aa aa aaa aaa aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa

Reference: The harbor has lots of neatly docked boats and some buildings are next to the harbor .
Hypothe

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 1, Step 50, Loss: 14.6810
Epoch 1, Step 100, Loss: 12.6083
Epoch 1, Validation Loss: 12.5653
Epoch 2, Step 50, Loss: 11.8130
Epoch 2, Step 100, Loss: 11.9576
Epoch 2, Validation Loss: 12.0209
Epoch 3, Step 50, Loss: 11.8041
Epoch 3, Step 100, Loss: 11.8975
Epoch 3, Validation Loss: 11.9221
Epoch 4, Step 50, Loss: 11.9578
Epoch 4, Step 100, Loss: 11.8962
Epoch 4, Validation Loss: 11.8777
Epoch 5, Step 50, Loss: 11.5022
Epoch 5, Step 100, Loss: 12.0072
Epoch 5, Validation Loss: 11.8561


0,1
epoch,▁▁▃▃▅▅▆▆██
train_loss,█▂▁▁▁
val_loss,█▃▂▁▁

0,1
epoch,5.0
train_loss,11.85816
val_loss,11.85615


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Running LoRA model evaluation...

Reference: The storage tanks here are half white and half black .
Hypothesis: Describe this remote sensing image.
package package package package
 ویکی‌پدیا ویکی‌پدیا ویکی‌پدیا wikipedia wikipedia wikipedia Wikipedia Wikipedia Wikipedia wikipedia wikipediawikipedia wikipedia wikipedia wikiwiki wikiwiki wikipedia wiki wiki wikiwikiwiki wiki wiki wiwiwiwi wiwiwiki wikiwiwi wiki wikiwi wikiwiki wiwi

Reference: The stratus clouds are located above the surface of the sea .
Hypothesis: What is shown in this satellite image?
None None None None
None none none none
none none none one one one
one one one ones ones ones
ones ones ones ons ons ons
ons ons ons on on on
ons on on ons ons ona ona ona
ona ona ona

Reference: The long strip island with dense vegetation is surrounded by light blue waters .
Hypothesis: Generate a one-sentence description of this image.
package package package package packaging packaging packaging package packaging package package packa

 Generate Table

In [None]:
import pandas as pd

# Add zero-shot baseline to results list
results.insert(0, {
    "config": "Zero-shot Baseline",
    "rank": "N/A",
    "modules": "N/A",
    "lora_BLEU": zero_shot_results["zero_shot_BLEU"],
    "lora_ROUGE_L": zero_shot_results["zero_shot_ROUGE_L"]
})

# Create DataFrame
results_table = pd.DataFrame(results)[["config", "rank", "modules", "lora_BLEU", "lora_ROUGE_L"]]
results_table.columns = ["Model", "Rank", "Modules", "BLEU-4", "ROUGE-L"]

print("\n📊 Model Performance Comparison:")
print(results_table.to_markdown(index=False))

# Save to CSV
results_table.to_csv("results_table.csv", index=False)

