# Experiment 1: Dense Baseline - Moondream RL Fine-Tuning

This notebook fine-tunes Moondream 2B on FloodNet VQA using RL (reinforcement learning).

**What you need:**
1. Moondream API key from https://moondream.ai/console
2. FloodNet VQA dataset from https://github.com/BinaLab/FloodNet-VQA

**No GPU required** - training happens on Moondream's servers.

## Step 1: Install Dependencies

In [None]:
!pip install -q requests pillow tqdm

## Step 2: Upload Your Data

Run this cell and upload your `floodnet.zip` file when prompted.

In [None]:
from google.colab import files
import os

# Upload floodnet.zip
print("Please upload your floodnet.zip file:")
uploaded = files.upload()

# Extract
!unzip -q floodnet.zip -d /content/
print("\nExtracted! Checking contents...")
!ls /content/floodnet/

## Step 3: Set Your API Key

Get your key from https://moondream.ai/console

In [None]:
from google.colab import userdata

# Get API key from Colab secrets
MOONDREAM_API_KEY = userdata.get('MOONDREAM_API_KEY')

## Step 4: Import Libraries & Setup

In [None]:
import json
import base64
import requests
from pathlib import Path
from tqdm.auto import tqdm
import time
import re
from collections import defaultdict
import numpy as np
import random

ROOT = Path("/content/floodnet")
ANN_ROOT = ROOT / "data"
IMG_ROOT = ROOT / "Images"

print("Libraries imported!")
print(f"Data path: {ROOT}")

## Step 5: Helper Functions

In [None]:
def load_json(path):
    """Load JSON file"""
    with open(path, "r") as f:
        return json.load(f)

def encode_image_to_base64(image_path):
    """Encode image to base64 data URL (required by Moondream API)"""
    with open(image_path, "rb") as f:
        img_data = base64.b64encode(f.read()).decode("utf-8")

    ext = str(image_path).lower()
    if ext.endswith('.png'):
        mime = "image/png"
    elif ext.endswith('.webp'):
        mime = "image/webp"
    else:
        mime = "image/jpeg"

    return f"data:{mime};base64,{img_data}"

def norm(x):
    """Normalize text for comparison"""
    x = str(x).lower().strip()
    x = re.sub(r"[^\w\s]", "", x)
    x = re.sub(r"\s+", " ", x)
    return x

print("Helper functions defined!")

## Step 6: Reward Function

In [None]:
def compute_reward(predicted, ground_truth):
    """
    Compute reward for RL training.

    - Exact match = 1.0
    - Numeric answers: relative error scoring
    - Text answers: token overlap (max 0.8)
    """
    pred_norm = norm(predicted)
    gt_norm = norm(ground_truth)

    if pred_norm == gt_norm:
        return 1.0

    try:
        pred_num = float(pred_norm)
        gt_num = float(gt_norm)
        if gt_num == 0:
            return 0.0 if pred_num != 0 else 1.0
        relative_error = abs(pred_num - gt_num) / max(abs(gt_num), 1)
        return max(0.0, 1.0 - relative_error)
    except ValueError:
        pass

    pred_tokens = set(pred_norm.split())
    gt_tokens = set(gt_norm.split())

    if len(pred_tokens) == 0 or len(gt_tokens) == 0:
        return 0.0

    intersection = pred_tokens.intersection(gt_tokens)
    union = pred_tokens.union(gt_tokens)
    overlap = len(intersection) / len(union)

    return overlap * 0.8  # Cap at 0.8, reserve 1.0 for exact match

print("Testing reward function:")
print(f"  'flooded' vs 'flooded' = {compute_reward('flooded', 'flooded')}")
print(f"  'flooded' vs 'Flooded' = {compute_reward('flooded', 'Flooded')}")
print(f"  '5' vs '5' = {compute_reward('5', '5')}")
print(f"  '4' vs '5' = {compute_reward('4', '5')}")
print(f"  'red car' vs 'blue car' = {compute_reward('red car', 'blue car'):.2f}")

## Step 7: Moondream API Wrapper

In [None]:
class MoondreamTrainer:
    """Wrapper for Moondream RL Fine-tuning API"""

    def __init__(self, api_key):
        self.api_key = api_key
        self.headers = {
            "X-Moondream-Auth": api_key,
            "Content-Type": "application/json"
        }
        self.base_url = "https://api.moondream.ai/v1/tuning"
        self.adapter_id = None
        self.current_step = 0

    def _request(self, method, endpoint, json_data=None):
        """Make API request with error handling"""
        url = f"{self.base_url}{endpoint}"
        response = requests.request(method, url, headers=self.headers, json=json_data)

        if not response.ok:
            print(f"API Error: {response.status_code} - {response.text}")
            response.raise_for_status()

        return response.json() if response.text else {}

    def create_adapter(self, name="floodnet-baseline", rank=32):
        """Create LoRA adapter. Rank: 8, 16, 24, or 32"""
        response = self._request("POST", "/adapters", {"name": name, "rank": rank})
        self.adapter_id = response["adapter_id"]
        print(f"âœ“ Created adapter: {self.adapter_id}")
        print(f"  Name: {name}, Rank: {rank}")
        return self.adapter_id

    def generate_rollouts(self, image_b64, question, num_rollouts=4, temperature=1.0):
        """Generate multiple answer attempts for training"""
        return self._request("POST", "/rollouts", {
            "adapter_id": self.adapter_id,
            "num_rollouts": num_rollouts,
            "request": {
                "skill": "query",
                "question": question,
                "image_url": image_b64,
                "settings": {"temperature": temperature, "max_tokens": 128}
            }
        })

    def train_step(self, groups, lr=0.002):
        """Apply training step with scored rollouts"""
        response = self._request("POST", "/train_step", {
            "adapter_id": self.adapter_id,
            "groups": groups,
            "lr": lr
        })
        self.current_step += 1
        return response

    def evaluate(self, image_b64, question):
        """Get deterministic answer (temperature=0)"""
        response = self.generate_rollouts(image_b64, question, num_rollouts=1, temperature=0.0)
        return response["rollouts"][0]["output"]["answer"]

    def release_checkpoint(self, step=None):
        """Promote checkpoint to production release"""
        step = step or self.current_step
        self._request("POST", f"/adapters/{self.adapter_id}/checkpoints/{step}/release")
        print(f"âœ“ Released checkpoint at step {step}")

    def pin_checkpoint(self, step):
        """Pin checkpoint to prevent expiration"""
        self._request("PATCH", f"/adapters/{self.adapter_id}/checkpoints/{step}",
                     {"expires_at": None})
        print(f"  Pinned checkpoint at step {step}")

print("MoondreamTrainer class defined!")

## Step 8: Load FloodNet Data

In [None]:
def prepare_samples(annotations, img_dir, max_samples=None):
    """Prepare samples from FloodNet annotations"""
    samples = []
    img_dir = Path(img_dir)

    for ann in (annotations[:max_samples] if max_samples else annotations):
        img_path = img_dir / ann["Image_ID"]
        if img_path.exists():
            samples.append({
                "img_path": img_path,
                "question": ann["Question"],
                "answer": str(ann["Ground_Truth"]),
                "type": ann.get("Question_Type", "unknown"),
                "image_id": ann["Image_ID"]
            })
    return samples

print("Loading annotations...")
train_ann = load_json(ANN_ROOT / "train_annotations.json")
val_ann = load_json(ANN_ROOT / "valid_annotations.json")
test_ann = load_json(ANN_ROOT / "test_annotations.json")

print(f"Annotations: Train={len(train_ann)} | Val={len(val_ann)} | Test={len(test_ann)}")

print("\nPreparing samples...")
train_samples = prepare_samples(train_ann, IMG_ROOT / "train_images")
val_samples = prepare_samples(val_ann, IMG_ROOT / "valid_images")
test_samples = prepare_samples(test_ann, IMG_ROOT / "test_images")

print(f"Valid samples: Train={len(train_samples)} | Val={len(val_samples)} | Test={len(test_samples)}")

print("\nExample sample:")
print(json.dumps({k: str(v) for k, v in train_samples[0].items()}, indent=2))

## Step 9: Training Function

In [None]:
def train_rl(trainer, train_samples, epochs=3, batch_size=8,
             samples_per_epoch=500, num_rollouts=4, lr=0.002):
    """
    RL fine-tuning loop.

    For each batch:
    1. Generate multiple rollouts (answer attempts)
    2. Score each rollout with our reward function
    3. Send scores back to train the model
    """

    print(f"\n{'='*60}")
    print("STARTING RL FINE-TUNING")
    print(f"{'='*60}")
    print(f"Epochs: {epochs}")
    print(f"Batch size: {batch_size}")
    print(f"Samples per epoch: {samples_per_epoch}")
    print(f"Rollouts per sample: {num_rollouts}")
    print(f"Learning rate: {lr}")

    history = []

    for epoch in range(1, epochs + 1):
        print(f"\n{'='*60}")
        print(f"EPOCH {epoch}/{epochs}")
        print(f"{'='*60}")

        epoch_samples = random.sample(
            train_samples,
            min(samples_per_epoch, len(train_samples))
        )

        epoch_rewards = []
        num_batches = len(epoch_samples) // batch_size

        pbar = tqdm(range(num_batches), desc=f"Epoch {epoch}")

        for batch_idx in pbar:
            batch_start = batch_idx * batch_size
            batch_end = batch_start + batch_size
            batch = epoch_samples[batch_start:batch_end]

            groups = []
            batch_rewards = []

            for sample in batch:
                try:
                    image_b64 = encode_image_to_base64(sample["img_path"])

                    rollout_resp = trainer.generate_rollouts(
                        image_b64,
                        sample["question"],
                        num_rollouts=num_rollouts,
                        temperature=1.0
                    )

                    rewards = [
                        compute_reward(r["output"]["answer"], sample["answer"])
                        for r in rollout_resp["rollouts"]
                    ]

                    batch_rewards.extend(rewards)

                    groups.append({
                        "request": rollout_resp["request"],
                        "rollouts": rollout_resp["rollouts"],
                        "rewards": rewards
                    })

                    time.sleep(0.05)

                except Exception as e:
                    print(f"\nError on {sample['image_id']}: {e}")
                    continue

            if groups:
                try:
                    trainer.train_step(groups, lr=lr)
                    epoch_rewards.extend(batch_rewards)

                    avg_r = np.mean(batch_rewards)
                    pbar.set_postfix({"reward": f"{avg_r:.3f}", "step": trainer.current_step})

                except Exception as e:
                    print(f"\nTraining step failed: {e}")

        avg_reward = np.mean(epoch_rewards) if epoch_rewards else 0.0
        print(f"\nEpoch {epoch} complete!")
        print(f"  Average reward: {avg_reward:.4f}")
        print(f"  Total steps: {trainer.current_step}")

        history.append({
            "epoch": epoch,
            "avg_reward": float(avg_reward),
            "steps": trainer.current_step
        })

    print(f"\n{'='*60}")
    print("TRAINING COMPLETE!")
    print(f"{'='*60}")

    return history

print("Training function defined!")

## Step 10: Evaluation Function

In [None]:
def evaluate(trainer, samples, name="Test", max_samples=None):
    """
    Evaluate model on a dataset.

    Returns accuracy overall and by question type.
    """
    print(f"\n{'='*60}")
    print(f"EVALUATING: {name}")
    print(f"{'='*60}")

    results = {
        "correct": 0,
        "total": 0,
        "by_type": defaultdict(lambda: {"correct": 0, "total": 0}),
        "predictions": []
    }

    eval_samples = samples[:max_samples] if max_samples else samples

    for sample in tqdm(eval_samples, desc=f"Evaluating {name}"):
        try:
            image_b64 = encode_image_to_base64(sample["img_path"])
            predicted = trainer.evaluate(image_b64, sample["question"])

            is_correct = norm(predicted) == norm(sample["answer"])

            results["correct"] += int(is_correct)
            results["total"] += 1
            results["by_type"][sample["type"]]["correct"] += int(is_correct)
            results["by_type"][sample["type"]]["total"] += 1

            if len(results["predictions"]) < 20:
                results["predictions"].append({
                    "question": sample["question"],
                    "predicted": predicted,
                    "ground_truth": sample["answer"],
                    "correct": is_correct
                })

            time.sleep(0.05)

        except Exception as e:
            print(f"\nError: {e}")

    acc = results["correct"] / results["total"] if results["total"] > 0 else 0.0

    print(f"\n{name} Results:")
    print(f"  Overall Accuracy: {acc:.4f} ({results['correct']}/{results['total']})")
    print(f"\n  By Question Type:")

    for qt, m in sorted(results["by_type"].items()):
        type_acc = m["correct"] / m["total"] if m["total"] > 0 else 0.0
        print(f"    {qt}: {type_acc:.4f} ({m['correct']}/{m['total']})")

    return results, acc

print("Evaluation function defined!")

## Step 11: Initialize Trainer & Create Adapter

This creates a new LoRA adapter for fine-tuning.

In [None]:
trainer = MoondreamTrainer(MOONDREAM_API_KEY)

trainer.create_adapter(
    name="floodnet-exp1-dense-baseline",
    rank=32
)

## Step 12: Baseline Evaluation (Before Training)

Let's see how the model performs before fine-tuning.

In [None]:
baseline_results, baseline_acc = evaluate(
    trainer,
    test_samples,
    name="Baseline",
    max_samples=200
)

print(f"\nðŸ“Š Baseline accuracy: {baseline_acc:.4f}")

In [None]:
# Debug
print("Sample predictions vs ground truth:\n")
for p in baseline_results["predictions"][:10]:
    print(f"Q: {p['question']}")
    print(f"  Predicted:    '{p['predicted']}'")
    print(f"  Ground truth: '{p['ground_truth']}'")
    print(f"  Normalized P: '{norm(p['predicted'])}'")
    print(f"  Normalized GT: '{norm(p['ground_truth'])}'")
    print()

## Step 13: Train the Model

You can adjust:
- `epochs`: Number of passes through the data
- `samples_per_epoch`: How many samples to use per epoch
- `batch_size`: Samples per training step

In [None]:
history = train_rl(
    trainer,
    train_samples,
    epochs=3,              # no. of epochs
    batch_size=8,          # samples per training step
    samples_per_epoch=500, # sample to use per epoch
    num_rollouts=4,        # answer attempts per sample
    lr=0.002               # learning rate
)

## Step 14: Final Evaluation (After Training)

In [None]:
test_results, test_acc = evaluate(
    trainer,
    test_samples,
    name="After Training"
)

print(f"\n{'='*60}")
print("COMPARISON")
print(f"{'='*60}")
print(f"Baseline accuracy: {baseline_acc:.4f}")
print(f"Final accuracy:    {test_acc:.4f}")
print(f"Improvement:       {test_acc - baseline_acc:+.4f}")

## Step 16: Save & Download Results

In [None]:
results = {
    "experiment": "Experiment 1: Dense Baseline",
    "model": "Moondream 2B + LoRA (rank=32)",
    "adapter_id": trainer.adapter_id,
    "training": {
        "epochs": 3,
        "samples_per_epoch": 500,
        "batch_size": 8,
        "num_rollouts": 4,
        "learning_rate": 0.002,
        "final_step": trainer.current_step,
        "history": history
    },
    "results": {
        "baseline_accuracy": float(baseline_acc),
        "final_accuracy": float(test_acc),
        "improvement": float(test_acc - baseline_acc)
    },
    "by_question_type": {
        qt: {
            "accuracy": m["correct"] / m["total"] if m["total"] > 0 else 0,
            "correct": m["correct"],
            "total": m["total"]
        }
        for qt, m in test_results["by_type"].items()
    },
    "sample_predictions": test_results["predictions"]
}

output_file = "/content/experiment1_results.json"
with open(output_file, "w") as f:
    json.dump(results, f, indent=2)

print(f"Results saved to {output_file}")
print(json.dumps(results["results"], indent=2))

In [None]:
from google.colab import files
files.download('/content/experiment1_results.json')