In [1]:
!pip install -U google-generativeai




In [2]:
import os
print("KEY PRESENT:", "GOOGLE_API_KEY" in os.environ)


KEY PRESENT: False


In [3]:
# # Cell 1: Setup and Install

!pip install -q google-generativeai pillow imageio imageio-ffmpeg decord tqdm datasets
import google.generativeai as genai, os, json, torch

GEMINI_API_KEY = "AIzaSyAmEQXt2uK_SZKQ6ghSk3ROoNE-s97jny0" ## Insert here
genai.configure(api_key=GEMINI_API_KEY)


In [4]:

# List all available models
for m in genai.list_models():
    print(f"Name: {m.name}, Supported Generation Methods: {m.supported_generation_methods}")

Name: models/embedding-gecko-001, Supported Generation Methods: ['embedText', 'countTextTokens']
Name: models/gemini-2.5-pro-preview-03-25, Supported Generation Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Name: models/gemini-2.5-flash, Supported Generation Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Name: models/gemini-2.5-pro-preview-05-06, Supported Generation Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Name: models/gemini-2.5-pro-preview-06-05, Supported Generation Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Name: models/gemini-2.5-pro, Supported Generation Methods: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
Name: models/gemini-2.0-flash-exp, Supported Generation Methods: ['generateContent', 'countTokens', 'bidiGenerateContent']
Name: models/gemini-2.0-flash, 

In [6]:
model = genai.GenerativeModel("gemini-2.5-pro")


In [7]:
model.generate_content("hi").text


'Hello! How can I help you today?'

In [5]:
!du -sh /content/camerabench


du: cannot access '/content/camerabench': No such file or directory


In [6]:
# --- Colab: Create folders ---
import os, json

ROOT = "/content/camerabench"
VIDS = f"{ROOT}/videos_gif_mp4"
OUTS = f"{ROOT}/outputs"

for p in (ROOT, VIDS, OUTS):
    os.makedirs(p, exist_ok=True)

print("Root:", ROOT)
print("Videos:", VIDS)
print("Outputs:", OUTS)

Root: /content/camerabench
Videos: /content/camerabench/videos_gif_mp4
Outputs: /content/camerabench/outputs


In [7]:
# --- PATCH: re-define GIF‚ÜíMP4 utils with even-dimension padding ---
import imageio, imageio.v3 as iio
import numpy as np, os, warnings
from typing import Tuple, List
from PIL import Image
from decord import VideoReader, cpu

def to_rgb_frame(arr: np.ndarray) -> np.ndarray:
    # PIL-safe conversion path
    if isinstance(arr, Image.Image):
        if arr.mode in ("RGBA", "LA"):
            arr = arr.convert("RGBA")
            bg = Image.new("RGBA", arr.size, (0, 0, 0, 0))
            bg.alpha_composite(arr)
            arr = bg.convert("RGB")
        else:
            arr = arr.convert("RGB")
        return np.array(arr, dtype=np.uint8)

    # NumPy path
    if arr.dtype != np.uint8:
        a = arr.astype(np.float32)
        a = np.clip(a, 0, 255)
        arr = a.astype(np.uint8)

    if arr.ndim == 2:  # gray -> RGB
        arr = np.stack([arr, arr, arr], axis=-1)

    if arr.shape[-1] == 4:  # RGBA -> RGB (alpha over black)
        rgb = arr[..., :3].astype(np.float32)
        alpha = (arr[..., 3:4].astype(np.float32) / 255.0)
        rgb = (rgb * alpha).astype(np.uint8)
        arr = rgb

    if arr.shape[-1] != 3:
        first = arr[..., 0]
        arr = np.stack([first, first, first], axis=-1).astype(np.uint8)

    return arr

def _ensure_same_size(frames: List[np.ndarray]) -> Tuple[List[np.ndarray], Tuple[int, int]]:
    """Resize all frames to the first frame's WxH (consistent encoder input)."""
    if not frames:
        return frames, (0, 0)
    h, w = frames[0].shape[:2]
    out = []
    for f in frames:
        if f.shape[0] != h or f.shape[1] != w:
            pil = Image.fromarray(f)
            pil = pil.resize((w, h), resample=Image.Resampling.BILINEAR)
            f = np.array(pil, dtype=np.uint8)
        out.append(f)
    return out, (h, w)

def _pad_to_even(frames: List[np.ndarray]) -> Tuple[List[np.ndarray], Tuple[int, int]]:
    """Pad frames on the right/bottom by 1 pixel if width or height is odd (needed for yuv420p)."""
    if not frames:
        return frames, (0, 0)
    h, w = frames[0].shape[:2]
    pad_h = h % 2
    pad_w = w % 2
    if pad_h == 0 and pad_w == 0:
        return frames, (h, w)

    H, W = h + pad_h, w + pad_w
    out = []
    for f in frames:
        canvas = np.zeros((H, W, 3), dtype=np.uint8)
        canvas[:h, :w, :] = f
        out.append(canvas)
    return out, (H, W)

def decord_ok(path: str) -> bool:
    try:
        if not (os.path.exists(path) and os.path.getsize(path) > 0):
            return False
        vr = VideoReader(path, ctx=cpu(0))
        if len(vr) < 1:
            return False
        _ = vr[0]
        return True
    except Exception:
        return False

def convert_gif_to_mp4(gif_path: str, mp4_path: str, fps: float = 8.0) -> bool:
    """
    Robust GIF‚ÜíMP4:
      - normalize to RGB
      - force consistent WxH
      - **pad to even dims** for yuv420p
      - macro_block_size=1 to avoid implicit resizing
    """
    try:
        frames = []
        with imageio.get_reader(gif_path) as reader:
            for frame in reader:
                frames.append(to_rgb_frame(frame))
        if not frames:
            warnings.warn(f"No frames decoded from GIF: {gif_path}")
            return False

        frames, _ = _ensure_same_size(frames)
        frames, _ = _pad_to_even(frames)  # <-- critical fix for yuv420p

        imageio.mimsave(
            mp4_path,
            frames,
            fps=fps,
            macro_block_size=1,
            codec="libx264",
            format="FFMPEG",
            ffmpeg_params=["-pix_fmt", "yuv420p", "-movflags", "+faststart"],
        )
        return decord_ok(mp4_path)
    except Exception as e:
        warnings.warn(
            f"GIF‚ÜíMP4 failed for {gif_path} -> {mp4_path}: {e}\n\n"
            "Tip: this is often due to odd frame sizes with yuv420p; padding to even dims usually fixes it."
        )
        return False

In [8]:
# --- Colab: Load dataset & build manifest (download GIFs, convert to MP4) ---
import os, json, time, hashlib, requests
from datasets import load_dataset
from tqdm import tqdm

MANIFEST_PATH = f"{OUTS}/gif2mp4_manifest.json"

# Load split="test"
ds = load_dataset("syCen/CameraBench", split="test")

def safe_filename(url: str) -> str:
    """Make a deterministic, safe base name from URL."""
    h = hashlib.md5(url.encode("utf-8")).hexdigest()[:16]
    base = os.path.basename(url).split("?")[0]
    return f"{h}_{base}"

def download_gif(url: str, out_path: str, timeout=20, retries=3) -> bool:
    for i in range(retries):
        try:
            r = requests.get(url, stream=True, timeout=timeout)
            if r.status_code == 200:
                with open(out_path, "wb") as f:
                    for chunk in r.iter_content(chunk_size=1024 * 256):
                        if chunk:
                            f.write(chunk)
                return os.path.exists(out_path) and os.path.getsize(out_path) > 0
        except Exception:
            time.sleep(1.0 * (i + 1))
    return False

manifest = []
for idx, row in enumerate(tqdm(ds, desc="Building GIF‚ÜíMP4 manifest")):
    gif_url = row.get("video", None) or row.get("Video", None)
    rel_path = row.get("path", None)  # relative mp4 path/id in dataset metadata
    if not gif_url:
        continue

    gif_name = safe_filename(gif_url)
    local_gif = os.path.join(VIDS, gif_name if gif_name.lower().endswith(".gif") else gif_name + ".gif")
    local_mp4 = os.path.splitext(local_gif)[0] + ".mp4"

    # Download if not present
    if not os.path.exists(local_gif):
        ok = download_gif(gif_url, local_gif)
        if not ok:
            # Skip adding broken downloads; entry records failure state
            manifest.append({
                "row_idx": idx, "rel_path": rel_path, "gif_url": gif_url,
                "local_gif": local_gif, "local_mp4": local_mp4, "status": "gif_download_failed"
            })
            continue

    # Convert to MP4 if needed
    if not os.path.exists(local_mp4):
        _ = convert_gif_to_mp4(local_gif, local_mp4, fps=8.0)

    status = "ok" if decord_ok(local_mp4) else "mp4_unreadable"
    manifest.append({
        "row_idx": idx,
        "rel_path": rel_path,
        "gif_url": gif_url,
        "local_gif": local_gif,
        "local_mp4": local_mp4,
        "status": status,
    })

# Save manifest
with open(MANIFEST_PATH, "w") as f:
    json.dump(manifest, f, indent=2)

print(f"Saved manifest: {MANIFEST_PATH}")
print("Total items:", len(manifest))
print("Readable MP4s:", sum(1 for m in manifest if m["status"] == "ok"))
print("\nSample manifest entry:")
print(json.dumps(next((m for m in manifest if m["status"] == "ok"), manifest[0] if manifest else {}), indent=2))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

test.jsonl: 0.00B [00:00, ?B/s]

Generating test split:   0%|          | 0/1071 [00:00<?, ? examples/s]

  alpha = (arr[..., 3:4].astype(np.float32) / 255.0)
  rgb = (rgb * alpha).astype(np.uint8)
Building GIF‚ÜíMP4 manifest: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1071/1071 [32:13<00:00,  1.81s/it]

Saved manifest: /content/camerabench/outputs/gif2mp4_manifest.json
Total items: 1071
Readable MP4s: 1071

Sample manifest entry:
{
  "row_idx": 0,
  "rel_path": "videos/-2uIa-XMJC0.5.3.mp4",
  "gif_url": "https://huggingface.co/datasets/syCen/CameraBench/resolve/main/videos_gif/-2uIa-XMJC0.5.3.gif",
  "local_gif": "/content/camerabench/videos_gif_mp4/3e1afda20f104270_-2uIa-XMJC0.5.3.gif",
  "local_mp4": "/content/camerabench/videos_gif_mp4/3e1afda20f104270_-2uIa-XMJC0.5.3.mp4",
  "status": "ok"
}





In [8]:
# --- Cell 6: Sanity test using Gemini ---
import pathlib, json, os, time

ROOT = "/content/camerabench"
MANIFEST_PATH = f"{ROOT}/outputs/gif2mp4_manifest.json"

# Load manifest
with open(MANIFEST_PATH, "r") as f:
    manifest = json.load(f)

test_items = [m for m in manifest if m["status"] == "ok"][:5]
print(f"Testing on {len(test_items)} videos\n")

# Helper
def generate_caption_for_path(video_path: str, prompt=None) -> str:
    prompt = prompt or "Describe scenes in detail."
    video_bytes = pathlib.Path(video_path).read_bytes()
    response = model.generate_content(
        [
            {
                "role": "user",
                "parts": [
                    {"mime_type": "video/mp4", "data": video_bytes},
                    prompt,
                ],
            }
        ],
        request_options={"timeout": 180},
    )
    return (response.text or "").strip()

# Run a few tests
for m in test_items:
    vpath = m["local_mp4"]
    print(f"üé• {os.path.basename(vpath)}")
    t0 = time.time()
    try:
        cap = generate_caption_for_path(vpath)
        print(f"‚è±Ô∏è {time.time() - t0:.2f}s | {cap}\n")
    except Exception as e:
        print(f"‚ùå Error on {vpath}: {e}\n")

print("‚úÖ Sanity test complete.")


Testing on 5 videos

üé• 3e1afda20f104270_-2uIa-XMJC0.5.3.mp4
‚è±Ô∏è 10.48s | A detailed description of the video scenes is as follows:

**00:00 - 00:02**
The video opens with a high-angle shot looking down at a person dressed in all-white athletic wear, including a beanie, standing on a skateboard. They are positioned on a narrow ledge high up on a modern, dark building with large glass panels. In a swift, daring move, the person pushes off the ledge and drops down the side of the structure, riding their skateboard vertically for a moment before landing on another surface below.

**00:02 - 00:05**
The camera angle switches to a dramatic, top-down, bird's-eye view. The person is now smoothly skateboarding across a long, transparent glass bridge or walkway, suspended high in the air. Below the clear glass, the ground and some foliage are visible far below, creating a dizzying sense of height. As the person continues to ride forward, the camera slowly pulls upward, emphasizing the vastn

In [31]:
try:
    r = model.generate_content("Say 'Gemini is working.'")
    print("RESPONSE:", r.text)
except Exception as e:
    print("‚ùå TEXT TEST FAILED:", e)


KeyboardInterrupt: 

In [28]:
model

genai.GenerativeModel(
    model_name='models/gemini-2.5-pro',
    generation_config={},
    safety_settings={},
    tools=None,
    system_instruction=None,
    cached_content=None
)

In [None]:
!mv "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_gemini_gif.jsonl" "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_gemini_scene+motion_backup.jsonl"


In [12]:
import json

JSONL_PATH = "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_gemini_motiononly.jsonl"

done = []
with open(JSONL_PATH, "r") as f:
    for line in f:
        try:
            rec = json.loads(line.strip())
            done.append(rec)
        except Exception:
            continue

print(f"‚úÖ Total completed videos: {len(done)}")
print("üîπ Example entries:")
for r in done[:5]:
    print(f"row_idx={r['row_idx']}, caption={r['caption_generated'][:60]}...")


FileNotFoundError: [Errno 2] No such file or directory: '/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_gemini_motiononly.jsonl'

In [13]:
from google.colab import auth
auth.authenticate_user()

import google.generativeai as genai
# genai.configure(api_key="YOUR_API_KEY")   # if needed
model = genai.GenerativeModel("gemini-2.5-pro")


MessageError: Error: credential propagation was unsuccessful

In [None]:
print("Copying locally for fast read...")
!cp "$JSONL_PATH" /content/tmp_motion.jsonl

done = set()
with open("/content/tmp_motion.jsonl", "r") as f:
    for line in f:
        try:
            rec = json.loads(line)
            done.add(int(rec["row_idx"]))
        except Exception:
            continue

In [None]:
print(f"‚úÖ Loaded {len(done)} entries.")


In [24]:
total_remaining

1066

In [10]:
# --- Cell 7: Chunked captioning with Gemini 2.5 Pro (camera motion only) ---
from google.colab import drive
from tqdm import tqdm
from datasets import load_dataset
import os, json, time, pathlib

drive.mount('/content/drive')

ROOT = "/content/camerabench"
OUTS = f"{ROOT}/outputs"
MANIFEST_PATH = f"{OUTS}/gif2mp4_manifest.json"
DRIVE_FOLDER = "/content/drive/MyDrive/Deep Learning Fall 2025/Scene"
JSONL_PATH = f"{DRIVE_FOLDER}/motion_captions_gemini_scene.jsonl"

os.makedirs(DRIVE_FOLDER, exist_ok=True)

## MOVE IT LOWER
# VIDEOS_PER_CHUNK = 3
# VIDEOS_PER_CHUNK = total_remaining

print("Loading manifest and dataset...")
with open(MANIFEST_PATH, "r") as f:
    manifest = json.load(f)
ds = load_dataset("syCen/CameraBench", split="test")

# --- Fast local copy of progress file ---
done = set()
if os.path.exists(JSONL_PATH):
    print("Found existing progress file! Copying locally for fast read...")
    LOCAL_JSONL = "/content/temp_scene.jsonl"
    !cp "$JSONL_PATH" "$LOCAL_JSONL"

    with open(LOCAL_JSONL, "r") as f:
        for line in tqdm(f, desc="Loading completed entries"):
            try:
                rec = json.loads(line.strip())
                done.add(int(rec["row_idx"]))
            except Exception:
                continue

processable = [m for m in manifest if m.get("status") == "ok" and int(m["row_idx"]) not in done]

total_done = len(done)
total_remaining = len(processable)
total_videos = len([m for m in manifest if m.get("status") == "ok"])

# Code for VIDEOS PER CHUNK:
VIDEOS_PER_CHUNK = total_remaining


print(f"\n{'='*70}")
print(f"CAPTIONING STATUS (Google Drive)")
print(f"{'='*70}")
print(f"‚úÖ Already completed: {total_done} videos")
print(f"üìù Remaining: {total_remaining} videos")
print(f"üéØ Total readable videos: {total_videos}")
print(f"\n‚ñ∂Ô∏è  This run will process: {min(VIDEOS_PER_CHUNK, total_remaining)} videos")
print(f"üíæ Progress file: {JSONL_PATH}")
print(f"{'='*70}\n")

def caption_video_gemini(video_path: str, prompt=None) -> str:
    """Generate a caption using Gemini 2.5 Pro (scnee only)."""
    prompt = prompt or "Describe scenes in detail."

    try:
        video_bytes = pathlib.Path(video_path).read_bytes()
    except Exception as e:
        print(f"‚ö†Ô∏è Could not read {video_path}: {e}")
        return ""

    try:
        response = model.generate_content(
            [
                {
                    "role": "user",
                    "parts": [
                        {"mime_type": "video/mp4", "data": video_bytes},
                        prompt,
                    ],
                }
            ],
            request_options={"timeout": 180},
        )
        return (response.text or "").strip()
    except Exception as e:
        print(f"‚ö†Ô∏è Error processing {os.path.basename(video_path)}: {e}")
        return ""

# --- Process chunk ---
if total_remaining == 0:
    print("üéâ All videos already captioned! Run Cell 9 to export to Excel.")
else:
    chunk_to_process = processable[:VIDEOS_PER_CHUNK]
    ok_cnt, skip_cnt = 0, 0

    with open(JSONL_PATH, "a") as jf:
        for m in tqdm(chunk_to_process, desc=f"Captioning (chunk of {len(chunk_to_process)})"):
            idx = int(m["row_idx"])
            if idx in done:
                tqdm.write(f"‚è≠Ô∏è Skipping already-completed video {idx}")
                continue

            vpath = m["local_mp4"]
            try:
                start = time.time()
                cap = caption_video_gemini(vpath)
                duration = time.time() - start
                if not cap:
                    skip_cnt += 1
                    continue

                row = ds[idx]
                rec = {
                    "row_idx": idx,
                    "id_or_video_path": row.get("path"),
                    "video_link": m.get("gif_url"),
                    "local_mp4": vpath,
                    "caption_generated": cap,
                    "labels": row.get("labels"),
                    "human_motion_caption": row.get("caption"),
                    "model": "models/gemini-2.5-pro",
                    "runtime_seconds": round(duration, 2),
                    "source": "gif_preview_converted_to_mp4",
                    "task": "scene_baseline"
                }

                jf.write(json.dumps(rec, ensure_ascii=False) + "\n")
                # jf.flush()
                if ok_cnt % 10 == 0:
                    jf.flush()
                ok_cnt += 1
                done.add(idx)
                time.sleep(0.1)

            except Exception as e:
                tqdm.write(f"‚ùå Failed video {idx}: {e}")
                skip_cnt += 1

        jf.flush()

    print(f"\n{'='*70}")
    print(f"‚úÖ CHUNK COMPLETE - SAVED TO GOOGLE DRIVE!")
    print(f"{'='*70}")
    print(f"Successfully captioned this run: {ok_cnt} videos")
    print(f"Errors/skipped this run: {skip_cnt} videos")
    print(f"üíæ Progress saved to: {JSONL_PATH}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loading manifest and dataset...
Found existing progress file! Copying locally for fast read...


Loading completed entries: 1050it [00:00, 73364.86it/s]



CAPTIONING STATUS (Google Drive)
‚úÖ Already completed: 1050 videos
üìù Remaining: 21 videos
üéØ Total readable videos: 1071

‚ñ∂Ô∏è  This run will process: 21 videos
üíæ Progress file: /content/drive/MyDrive/Deep Learning Fall 2025/Scene/motion_captions_gemini_scene.jsonl



Captioning (chunk of 21): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 21/21 [04:54<00:00, 14.00s/it]


‚úÖ CHUNK COMPLETE - SAVED TO GOOGLE DRIVE!
Successfully captioned this run: 21 videos
Errors/skipped this run: 0 videos
üíæ Progress saved to: /content/drive/MyDrive/Deep Learning Fall 2025/Scene/motion_captions_gemini_scene.jsonl





In [18]:
!pip install xlsxwriter

Collecting xlsxwriter
  Downloading xlsxwriter-3.2.9-py3-none-any.whl.metadata (2.7 kB)
Downloading xlsxwriter-3.2.9-py3-none-any.whl (175 kB)
[?25l   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m0.0/175.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m175.3/175.3 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.2.9


In [11]:
# Cell 9: Export to Excel

from google.colab import drive
import pandas as pd
import json
import os

# Mount Google Drive
drive.mount('/content/drive')

# JSONL_PATH = "/content/camerabench/outputs/motion_captions_gemini_motiononly_deduped.jsonl"
JSONL_PATH = "/content/drive/MyDrive/Deep Learning Fall 2025/Scene/motion_captions_gemini_scene.jsonl"

SAVE_XLSX = "/content/drive/MyDrive/Deep Learning Fall 2025/Scene/scene_captions_gemini.xlsx"

print("Reading JSONL file...")
rows = []
with open(JSONL_PATH, "r") as f:
    for line in f:
        s = line.strip()
        if not s:
            continue
        try:
            obj = json.loads(s)
            rows.append({
                "row_idx": obj.get("row_idx"),
                "video_link": obj.get("video_link"),
                "caption_generated": obj.get("caption_generated"),
                "id_or_video_path": obj.get("id_or_video_path"),
                "labels": obj.get("labels"),
                "human_motion_caption": obj.get("human_motion_caption"),
            })
        except Exception as e:
            print(f"‚ö†Ô∏è  Skipping malformed line: {e}")

# Create DataFrame
df = pd.DataFrame(rows)

# Save to Excel
print(f"Saving to Excel...")
os.makedirs(os.path.dirname(SAVE_XLSX), exist_ok=True)

with pd.ExcelWriter(SAVE_XLSX, engine="xlsxwriter") as writer:
    df.to_excel(writer, index=False, sheet_name="captions")

    # Auto-adjust column widths
    worksheet = writer.sheets["captions"]
    for i, col in enumerate(df.columns):
        max_len = max(df[col].astype(str).map(len).max(), len(col)) + 2
        worksheet.set_column(i, i, min(max_len, 50))

print(f"\n{'='*70}")
print(f"‚úÖ EXCEL EXPORT COMPLETE!")
print(f"{'='*70}")
print(f"Total captions: {len(df)}")
print(f"Saved to: {SAVE_XLSX}")
print(f"\nüìä Preview:")
print(df.head(3)[["row_idx", "caption_generated"]])

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Reading JSONL file...
Saving to Excel...

‚úÖ EXCEL EXPORT COMPLETE!
Total captions: 1071
Saved to: /content/drive/MyDrive/Deep Learning Fall 2025/Scene/scene_captions_gemini.xlsx

üìä Preview:
   row_idx                                  caption_generated
0        0  The video features a series of short, dynamic ...
1        1  A young woman stands outdoors in a peaceful, w...
2        2  This video shows a single, dramatic scene from...


In [None]:
# --- Colab: Progress polling & peek ---
!wc -l /content/camerabench/outputs/motion_captions_qwen_gif.jsonl || echo "No JSONL yet."

# Print first 3 non-empty, well-formed lines
import json
path = "/content/camerabench/outputs/motion_captions_qwen_gif.jsonl"
try:
    with open(path, "r") as f:
        shown = 0
        for line in f:
            s = line.strip()
            if not s:
                continue
            try:
                obj = json.loads(s)
                print(json.dumps(obj, indent=2, ensure_ascii=False))
                shown += 1
                if shown >= 3:
                    break
            except Exception:
                continue
except FileNotFoundError:
    print("JSONL not found.")

In [None]:
!ls

In [None]:
!ls /content/drive/MyDrive/camerabench_full_backup/outputs

In [None]:
# --- Colab: Export to Excel (Drive) ---
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd

JSONL_PATH = "/content/drive/MyDrive/camerabench_full_backup/outputs/motion_captions_qwen_gif.jsonl"
# JSONL_PATH = "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_qwen_gif.jsonl"
SAVE_XLSX = "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_qwen_2.xlsx"

rows = []
with open(JSONL_PATH, "r") as f:
    for line in f:
        s = line.strip()
        if not s:
            continue
        try:
            obj = json.loads(s)
            rows.append({
                "video_link": obj.get("video_link"),
                "caption_generated": obj.get("caption_generated"),
                "id_or_video_path": obj.get("id_or_video_path"),
            })
        except Exception:
            # skip malformed lines
            pass

df = pd.DataFrame(rows, columns=["video_link", "caption_generated", "id_or_video_path"])
os.makedirs(os.path.dirname(SAVE_XLSX), exist_ok=True)
with pd.ExcelWriter(SAVE_XLSX, engine="xlsxwriter") as writer:
    df.to_excel(writer, index=False, sheet_name="captions")

print("Saved Excel to:", SAVE_XLSX)
print(df.head(3))

# Calculate BERT SCORE

In [None]:
!pip install bert-score pandas


In [None]:
# --- Cell 10: Compute and Save BERTScore (from motion_captions_qwen.xlsx) ---
!pip install -q bert-score pandas openpyxl

import pandas as pd
from bert_score import score

# Paths
EXCEL_PATH = "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_qwen.xlsx"
SAVE_PATH  = "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_gemini_bertscore.xlsx"

# --- Load Excel ---
df = pd.read_excel(EXCEL_PATH)
print(f"‚úÖ Loaded {len(df)} rows from {EXCEL_PATH}")

# --- Extract captions for scoring ---
preds = df["caption_generated"].astype(str).tolist()
refs  = df["human_motion_caption"].astype(str).tolist()

# --- Compute BERTScore (default: roberta-large) ---
P, R, F1 = score(preds, refs, lang="en")

# --- Add scores as new columns ---
df["bertscore_precision"] = P.tolist()
df["bertscore_recall"]    = R.tolist()
df["bertscore_f1"]        = F1.tolist()

# --- Compute and display averages ---
precision_mean = P.mean().item()
recall_mean = R.mean().item()
f1_mean = F1.mean().item()

print("\n================ BERTScore (Gemini Captions) ================")
print(f"Precision: {precision_mean:.4f}")
print(f"Recall:    {recall_mean:.4f}")
print(f"F1:        {f1_mean:.4f}")
print("============================================================")

# --- Match collaborator‚Äôs column order ---
cols_order = [
    "video_link",
    "caption_generated",
    "human_motion_caption",
    "bertscore_precision",
    "bertscore_recall",
    "bertscore_f1",
    "labels",
    "id_or_video_path",
]

# Fill missing columns with blanks if necessary
for col in cols_order:
    if col not in df.columns:
        df[col] = ""

df = df[cols_order]

# --- Save to Excel ---
df.to_excel(SAVE_PATH, index=False)
print(f"üíæ Saved detailed BERTScore results to {SAVE_PATH}")


In [None]:

import pandas as pd
from bert_score import score
import os

# Paths
EXCEL_PATH = "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_qwen.xlsx"
SAVE_PATH  = "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_gemini_bertscore.xlsx"

# --- Load Excel ---
df = pd.read_excel(EXCEL_PATH)
print(f"‚úÖ Loaded {len(df)} rows from {EXCEL_PATH}")

# --- Extract caption pairs ---
preds = df["caption_generated"].astype(str).tolist()
refs  = df["human_motion_caption"].astype(str).tolist()

# --- Compute BERTScore (default roberta-large) ---
P, R, F1 = score(preds, refs, lang="en", verbose=True)

# --- Add BERTScore columns ---
df["bertscore_precision"] = [round(p.item(), 10) for p in P]
df["bertscore_recall"]    = [round(r.item(), 10) for r in R]
df["bertscore_f1"]        = [round(f.item(), 10) for f in F1]

# --- Compute averages ---
precision_mean = P.mean().item()
recall_mean = R.mean().item()
f1_mean = F1.mean().item()

print("\n================ BERTScore (Gemini Captions) ================")
print(f"Precision: {precision_mean:.4f}")
print(f"Recall:    {recall_mean:.4f}")
print(f"F1:        {f1_mean:.4f}")
print("============================================================")

# --- Order columns like collaborator ---
cols_order = [
    "row_idx",
    "video_link",
    "caption_generated",
    "human_motion_caption",
    "bertscore_precision",
    "bertscore_recall",
    "bertscore_f1",
    "labels",
    "id_or_video_path",
]
for col in cols_order:
    if col not in df.columns:
        df[col] = ""

df = df[cols_order]

# --- Add average summary row ---
summary = {
    "row_idx": "AVERAGE",
    "bertscore_precision": round(precision_mean, 4),
    "bertscore_recall": round(recall_mean, 4),
    "bertscore_f1": round(f1_mean, 4),
}
df = pd.concat([df, pd.DataFrame([summary])], ignore_index=True)


In [None]:

!pip install -q xlsxwriter

# --- Save formatted Excel ---
os.makedirs(os.path.dirname(SAVE_PATH), exist_ok=True)
with pd.ExcelWriter(SAVE_PATH, engine="xlsxwriter") as writer:
    df.to_excel(writer, index=False, sheet_name="captions")

    worksheet = writer.sheets["captions"]

    # Auto-adjust column widths
    for i, col in enumerate(df.columns):
        max_len = max(df[col].astype(str).map(len).max(), len(col)) + 2
        worksheet.set_column(i, i, min(max_len, 70))

print(f"\n‚úÖ Saved full BERTScore results to:\n{SAVE_PATH}")

In [None]:
import pandas as pd
import numpy as np

FILE_PATH = "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_gemini_bertscore.xlsx"
SAVE_FIXED = "/content/drive/MyDrive/Deep Learning Fall 2025/motion_captions_gemini_bertscore_fixed.xlsx"

# --- Load file ---
df = pd.read_excel(FILE_PATH)
print(f"‚úÖ Loaded {len(df)} rows")

# --- Step 1: Force numeric conversion ---
df["row_idx"] = pd.to_numeric(df["row_idx"], errors="coerce")

# Step 2: Identify any non-numeric or NaN row_idx rows (e.g., "AVERAGE" or blanks)
non_numeric = df[df["row_idx"].isna()]
if not non_numeric.empty:
    print(f"‚ö†Ô∏è Found {len(non_numeric)} non-numeric row_idx rows (keeping them at end):")
    print(non_numeric.head(3))
else:
    print("‚úÖ All row_idx values are numeric.")

# --- Step 3: Sort numerically, keeping any NaN or text rows last ---
df_sorted = df.sort_values(by="row_idx", ascending=True, na_position="last").reset_index(drop=True)

# --- Step 4: Verify continuity and check duplicates ---
duplicates = df_sorted[df_sorted["row_idx"].duplicated(keep=False)]
if not duplicates.empty:
    print(f"‚ö†Ô∏è Warning: Found {len(duplicates)} duplicate row_idx entries.")
    print(duplicates[["row_idx", "id_or_video_path"]])
else:
    print("‚úÖ No duplicate row_idx values found.")

# Check if 699 is in correct position
if (df_sorted["row_idx"] == 699).any():
    idx_699 = df_sorted.index[df_sorted["row_idx"] == 699][0]
    print(f"‚úÖ Row 699 now appears at DataFrame index position: {idx_699}")
else:
    print("‚ö†Ô∏è Row 699 not found! Check for typos or missing data.")

# --- Step 5: Save clean file ---
with pd.ExcelWriter(SAVE_FIXED, engine="xlsxwriter") as writer:
    df_sorted.to_excel(writer, index=False, sheet_name="captions")
    worksheet = writer.sheets["captions"]
    for i, col in enumerate(df_sorted.columns):
        max_len = max(df_sorted[col].astype(str).map(len).max(), len(col)) + 2
        worksheet.set_column(i, i, min(max_len, 70))

print(f"\nüíæ Saved clean, sorted file to:\n{SAVE_FIXED}")


In [40]:
from datasets import load_dataset
ds = load_dataset("syCen/CameraBench", split="test")


In [42]:
video_bytes = ds[0]


In [43]:
video_bytes

{'Video': 'https://huggingface.co/datasets/syCen/CameraBench/resolve/main/videos_gif/-2uIa-XMJC0.5.3.gif',
 'labels': ['minimal-shaking', 'complex-motion', 'regular-speed', 'tilt-down'],
 'caption': 'The camera smoothly trucks slightly to the left, then quickly tilts downward before moving backward to follow the skateboarder, maintaining minimal shaking throughout.',
 'path': 'videos/-2uIa-XMJC0.5.3.mp4'}

In [44]:
!mkdir -p "/content/drive/MyDrive/camerabench"

!cp -r /content/camerabench/videos_gif_mp4 \
      "/content/drive/MyDrive/camerabench/videos_mp4"

!cp /content/camerabench/outputs/gif2mp4_manifest.json \
      "/content/drive/MyDrive/camerabench/manifest.json"
