# AIC-25 Colab: Textual KIS (Clean Flow)

Choose one path and follow the numbered steps.

- Path A — Quickstart (use provided features): simplest, fastest
- Path B — Recompute (SigLIP2): higher quality, recomputes features and model

Steps overview
1) Clone repo
2) Install deps
3) Download dataset
4A) Quickstart setup (Path A) — OR — 4B) Recompute SigLIP2 (Path B)
5) Start backend
6) Run a KIS query and export CSV
7) Zip for Codabench

In [None]:
# Choose your path: 'quickstart' or 'recompute'
PATH_CHOICE = 'recompute'  # <-- set to 'quickstart' for the simplest path
print('Path:', PATH_CHOICE)


In [None]:
# Step 1) Clone the repo into /content/aic-25
import os, shutil, subprocess
REPO_URL = 'https://github.com/dnlqvu/hcm-AI-challenge-2024-main.git'
TARGET_DIR = '/content/aic-25'
if os.path.exists(TARGET_DIR):
    shutil.rmtree(TARGET_DIR)
print('Cloning', REPO_URL, '->', TARGET_DIR)
subprocess.run(['git', 'clone', REPO_URL, TARGET_DIR], check=True)
print(os.listdir(TARGET_DIR))


In [None]:
%cd /content/aic-25
!echo 'CWD:' && pwd && echo 'Top-level:' && ls -la | head -n 50


In [None]:
# Step 2) Install backend + extractor dependencies
%cd /content/aic-25/aic-24-BE
!python -m pip install --quiet --upgrade pip
!pip install --quiet -r requirements.txt
# Check if critical packages installed successfully
import sys
try:
    import uvicorn
    import fastapi
    print("✓ Backend dependencies installed")
except ImportError as e:
    print(f"✗ Backend dependency missing: {e}")
    sys.exit(1)

%cd /content/aic-25
# Extras for local extraction & utilities
!pip install --quiet opencv-python tqdm pillow open_clip_torch
# Verify extraction dependencies
try:
    import cv2
    import open_clip
    import torch
    print(f"✓ Extraction dependencies installed (torch device: {'cuda' if torch.cuda.is_available() else 'cpu'})")
except ImportError as e:
    print(f"✗ Extraction dependency missing: {e}")
    sys.exit(1)

In [None]:
# Step 3) Upload AIC_2025_dataset_download_link.csv (or set CSV_PATH)
from google.colab import files
uploaded = files.upload()  # choose AIC_2025_dataset_download_link.csv
CSV_PATH = next(iter(uploaded))
print('Using CSV:', CSV_PATH)


In [None]:
# Step 3) Download dataset assets to example_dataset/ and extract
%cd /content/aic-25
!python tools/aic_cli.py download-dataset --csv $CSV_PATH --outdir example_dataset --extract
!ls -la example_dataset | head -n 50
# Sanity checks
!test -d example_dataset/map-keyframes || echo 'MISSING: example_dataset/map-keyframes'
!test -d example_dataset/clip-features-32 || echo 'MISSING: example_dataset/clip-features-32'
!test -d example_dataset/media-info || echo 'MISSING: example_dataset/media-info'
!test -d example_dataset/keyframes || echo 'MISSING: example_dataset/keyframes'


In [None]:
%cd /content/aic-25
if PATH_CHOICE != 'recompute':
    print('Skipping Recompute (PATH_CHOICE!=recompute)')
    raise SystemExit(0)
VIDEOS_DIR = 'example_dataset/Videos_L21_a'
MODEL = 'ViT-L-16-SigLIP-384'
PRETRAINED = 'webli'  # Correct pretrained tag for SigLIP models
CLIP_LEN = 1.5
DECODE_FPS = 1.5  # Reduced from 2.0 to save memory
TARGET_FPS = 0.8  # Reduced from 1.0 to save memory
# 4B.1 Smart sampling → extract exact frames (original indices) - with memory optimization
!python tools/aic_cli.py sample-smart --strategy clip-delta --videos-dir $VIDEOS_DIR \
    --frames-dir aic-24-BE/data/video_frames --decode-fps $DECODE_FPS --target-fps $TARGET_FPS \
    --model $MODEL --pretrained $PRETRAINED

# 4B.2 Encode sampled frames with SigLIP2 and write shards
%cd /content/aic-25/aic-24-BE
import os, numpy as np, torch, pickle, gc
from pathlib import Path
from PIL import Image
import open_clip
from tqdm import tqdm
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Loading {MODEL} with {PRETRAINED} on {device}...")
model, _, preprocess = open_clip.create_model_and_transforms(MODEL, pretrained=PRETRAINED, device=device)
model.eval()
frames_root = 'data/video_frames'
out_dir = 'data/clip_features'
os.makedirs(out_dir, exist_ok=True)

# Check if frames directory exists before proceeding
if not os.path.exists(frames_root):
    raise FileNotFoundError(f"Frames directory not found: {frames_root}. Smart sampling may have failed.")

def encode_batch(img_paths):
    ims=[]
    for p in img_paths:
        im = Image.open(p).convert('RGB')
        ims.append(preprocess(im))
    with torch.no_grad():
        batch = torch.stack(ims).to(device)
        feats = model.encode_image(batch)
        feats = feats / feats.norm(dim=-1, keepdim=True)
        return feats.cpu().float().numpy()

for vid in sorted(os.listdir(frames_root)):
    vid_dir = os.path.join(frames_root, vid)
    if not os.path.isdir(vid_dir):
        continue
    imgs = [f for f in os.listdir(vid_dir) if f.lower().endswith('.jpg')]
    if not imgs:
        continue
    imgs = sorted(imgs, key=lambda x: int(os.path.splitext(x)[0]))
    file_paths = [f'./data/video_frames/{vid}/{name}' for name in imgs]
    feats_list=[]
    bs=32  # Reduced batch size from 64 to save memory
    for i in tqdm(range(0, len(imgs), bs), desc=f'Encoding {vid}'):
        batch_paths = [os.path.join(vid_dir, name) for name in imgs[i:i+bs]]
        feats_list.append(encode_batch(batch_paths))
        # Force garbage collection to free memory
        if i % 128 == 0:
            gc.collect()
            torch.cuda.empty_cache()
    feats_np = np.concatenate(feats_list, axis=0)
    with open(os.path.join(out_dir, f'{vid}.pkl'), 'wb') as f:
        pickle.dump((file_paths, feats_np), f)
    print(f"Encoded {vid}: {feats_np.shape[0]} features")
    
# 4B.3 Build model and patch .env
print("Building NitzcheCLIP model...")
from nitzche_clip import NitzcheCLIP
m = NitzcheCLIP(out_dir)
os.makedirs('models', exist_ok=True)
m.save('models/clip_siglip.pkl')
envp = Path('.env')
content = envp.read_text(encoding='utf-8') if envp.exists() else ''
lines = []
saw_path = saw_16 = False
for line in content.splitlines():
    if line.strip().startswith('MODEL_PATH='): lines.append('MODEL_PATH="./models/"'); saw_path=True
    elif line.strip().startswith('MODEL_16='): lines.append('MODEL_16="clip_siglip.pkl"'); saw_16=True
    else: lines.append(line)
if not saw_path: lines.append('MODEL_PATH="./models/"')
if not saw_16: lines.append('MODEL_16="clip_siglip.pkl"')
# Also set matching text encoder
set_name = False; set_pre = False
out=[]
for line in lines:
    if line.strip().startswith('CLIP_MODEL_NAME='): out.append(f'CLIP_MODEL_NAME="{MODEL}"'); set_name=True
    elif line.strip().startswith('CLIP_PRETRAINED='): out.append(f'CLIP_PRETRAINED="{PRETRAINED}"'); set_pre=True
    else: out.append(line)
if not set_name: out.append(f'CLIP_MODEL_NAME="{MODEL}"')
if not set_pre: out.append(f'CLIP_PRETRAINED="{PRETRAINED}"')
envp.write_text('\n'.join(out)+'\n', encoding='utf-8')
print('✅ Recompute complete. Smart-sampled frames + SigLIP2 features. .env updated.')

In [None]:
# Step 5) Start backend API (daemon)
%cd /content/aic-25
!python tools/aic_cli.py serve --port 8000 --run --daemon --no-reload
!python tools/aic_cli.py serve-status
import time, requests
for _ in range(30):
        																				
    try:
        r = requests.get('http://localhost:8000/docs', timeout=2)
        print('Backend reachable:', r.status_code)
        break
    except Exception:
        time.sleep(1)
else:
    print('Backend not reachable')


In [None]:
# Step 6) Prepare a KIS query
%cd /content/aic-25
query_text = 'Cảnh quay bằng flycam một cây cầu ở TP Hồ Chí Minh, tiếp theo đến cảnh quay tòa nhà Bitexco. Một vài cảnh sau đó chuyển qua quay hình ảnh hồ gươm tại Hà Nội.'  # edit your KIS query here
print('Query:', (query_text[:120] + ('...' if len(query_text) > 120 else '')))


In [None]:
# Step 6) Export KIS CSV to submission/
%cd /content/aic-25
!python tools/aic_cli.py export --text "Cảnh quay bằng flycam một cây cầu ở TP Hồ Chí Minh, tiếp theo đến cảnh quay tòa nhà Bitexco. Một vài cảnh sau đó chuyển qua quay hình ảnh hồ gươm tại Hà Nội." --task kis --name query-1 --api http://localhost:8000 --outdir submission --wait-api 30
!echo 'Generated files:' && ls -la submission
!echo 'Preview:' && head -n 5 submission/query-1-kis.csv


In [None]:
# Step 7) Zip for Codabench
%cd /content/aic-25
!python tools/aic_cli.py zip-submission --outdir submission --name aic25_submission.zip
from google.colab import files as colab_files
colab_files.download('aic25_submission.zip')
