# AIC-25 Colab: Textual KIS (CLI flow)

This notebook clones the repo, downloads dataset assets from a CSV of links, wires the dataset into the backend, starts the API, and exports KIS CSVs using the CLI tools.

Note: TRAKE requires Sonic + transcripts/heading JSON ingestion and is not covered here.

In [None]:
# Clone the repo into /content/aic-25
import os, shutil, subprocess
REPO_URL = "https://github.com/dnlqvu/hcm-AI-challenge-2024-main.git"
TARGET_DIR = "/content/aic-25"

if os.path.exists(TARGET_DIR):
    shutil.rmtree(TARGET_DIR)
print('Cloning', REPO_URL, '->', TARGET_DIR)
subprocess.run(['git', 'clone', REPO_URL, TARGET_DIR], check=True)

# List top-level files to confirm
print(os.listdir(TARGET_DIR))


In [None]:
# Install backend dependencies
%cd /content/aic-25/aic-24-BE
!python -m pip install --upgrade pip
!pip install -r requirements.txt


In [None]:
# Upload AIC_2025_dataset_download_link.csv (or edit CSV_PATH to an accessible path)
from google.colab import files
uploaded = files.upload()  # choose AIC_2025_dataset_download_link.csv
CSV_PATH = next(iter(uploaded))
print('Using CSV:', CSV_PATH)


In [None]:
# Download dataset assets to example_dataset/ and extract
%cd /content/aic-25
!python tools/aic_cli.py download-dataset --csv $CSV_PATH --outdir example_dataset --extract
!ls -la example_dataset | head -n 50
# Sanity checks for required subfolders (maps, features, media-info, keyframes)
!test -d example_dataset/map-keyframes || echo 'MISSING: example_dataset/map-keyframes (run download with --extract or upload and extract map-keyframes zip)'
!test -d example_dataset/clip-features-32 || echo 'MISSING: example_dataset/clip-features-32 (run download with --extract or upload and extract features zip)'
!test -d example_dataset/media-info || echo 'MISSING: example_dataset/media-info (run download with --extract or upload and extract media-info zip)'
!test -d example_dataset/keyframes || echo 'MISSING: example_dataset/keyframes (run download with --extract or upload and extract keyframes zip)'


## Recommended: Recompute Features (SigLIP2)

This path re-encodes frames with a stronger text-aligned model (SigLIP2) and rebuilds the backend model.
- Uses Lighthouse decoding if available (`vendor/lighthouse-main`), otherwise falls back to OpenCV midpoints.
- Skips the older precomputed features; you don't need to run `setup-example`.


In [None]:
%cd /content/aic-25
# Minimal deps for local extraction (open_clip is already in backend requirements)
!python -m pip install --quiet --upgrade opencv-python tqdm pillow


In [None]:
%cd /content/aic-25
VIDEOS_DIR = 'example_dataset/Videos_L21_a'
MODEL = 'ViT-L-16-SigLIP-384'  # SigLIP2-like (adjust as desired)
PRETRAINED = 'hf-hub:google/siglip-so400m-patch14-384'  # pretrained tag in open_clip
CLIP_LEN = 1.5
OUTDIR = 'hero_colab_out/clip-vit_features'
# Prefer Lighthouse decoding when present
import os
BACKEND = 'lighthouse-clip' if os.path.isdir('vendor/lighthouse-main') else 'clip'
print('Using backend:', BACKEND)

# 1) Extract features
!python tools/aic_cli.py clip-extract-colab --videos-dir $VIDEOS_DIR --outdir $OUTDIR --clip-len $CLIP_LEN --backend $BACKEND --model $MODEL --pretrained $PRETRAINED

# 2) Convert features to shards + emit frame list
!python tools/convert_hero_clip_to_shards.py --hero-clip-dir $OUTDIR --media-info aic-24-BE/data/media-info --clip-len $CLIP_LEN --outdir aic-24-BE/data/clip_features --emit-frame-list selected_frames_from_clip.csv

# 3) Extract exact frames for those indices
!python aic-24-BE/data_processing/crop_frame.py --input-dir $VIDEOS_DIR --output-dir aic-24-BE/data/video_frames --frame-list selected_frames_from_clip.csv

# 4) Build model and patch .env with text encoder config
!python tools/aic_cli.py build-model-from-shards --be-dir aic-24-BE --shards-dir aic-24-BE/data/clip_features --model-name clip_siglip.pkl

# Patch .env to use matching text tower
from pathlib import Path
envp = Path('aic-24-BE/.env')
content = envp.read_text(encoding='utf-8') if envp.exists() else ''
lines = []
saw_name = False; saw_pre = False
for line in content.splitlines():
    if line.strip().startswith('CLIP_MODEL_NAME='):
        lines.append(f'CLIP_MODEL_NAME="{MODEL}"'); saw_name=True
    elif line.strip().startswith('CLIP_PRETRAINED='):
        lines.append(f'CLIP_PRETRAINED="{PRETRAINED}"'); saw_pre=True
    else:
        lines.append(line)
if not saw_name: lines.append(f'CLIP_MODEL_NAME="{MODEL}"')
if not saw_pre: lines.append(f'CLIP_PRETRAINED="{PRETRAINED}"')
envp.write_text('
'.join(lines)+'
', encoding='utf-8')
print('Recompute complete. .env updated with CLIP text model config.')


## Optional: Smart Sampling (AI-driven)

Use the smart sampler to select semantically novel frames (CLIP delta) or shot-aware frames, then extract only those.
This replaces the provided keyframes; if you use this path, skip the next 'setup-example' cell.


In [None]:
%cd /content/aic-25
# Install extras for smart sampling (opencv; transnetv2 optional)
!python -m pip install --quiet --upgrade opencv-python
# Optional (for shot-aware): !pip install --quiet transnetv2


In [None]:
%cd /content/aic-25
VIDEOS_DIR = 'example_dataset/Videos_L21_a'  # set to your raw videos folder
# Strategy options: 'clip-delta' (semantic change) or 'shots' (shot-aware)
STRATEGY = 'clip-delta'
DECODE_FPS = 2.0   # analysis decode rate for clip-delta
TARGET_FPS = 1.0   # target kept frames/sec for clip-delta
SHOT_DECODE_FPS = 10.0  # for shot-aware
SHOT_LONG_SEC = 4.0     # long shot threshold (more samples)

if STRATEGY == 'clip-delta':
    !python tools/aic_cli.py sample-smart --strategy clip-delta --videos-dir $VIDEOS_DIR \
        --frames-dir aic-24-BE/data/video_frames --decode-fps $DECODE_FPS --target-fps $TARGET_FPS
else:
    !python tools/aic_cli.py sample-smart --strategy shots --videos-dir $VIDEOS_DIR \
        --frames-dir aic-24-BE/data/video_frames --shot-decode-fps $SHOT_DECODE_FPS --shot-long-sec $SHOT_LONG_SEC

!ls -la aic-24-BE/data/video_frames | head -n 20


In [None]:
# Optional: Rebuild CLIP feature shards and model from smart-sampled frames (ViT-B/32)
%cd /content/aic-25/aic-24-BE
import os, numpy as np, torch, pickle
from PIL import Image
import open_clip
from tqdm import tqdm

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model, _, preprocess = open_clip.create_model_and_transforms('ViT-B-32', pretrained='laion2b_s34b_b79k', device=device)
model.eval()
frames_root = 'data/video_frames'
out_dir = 'data/clip_features'
os.makedirs(out_dir, exist_ok=True)

def encode_batch(img_paths):
    ims=[]
    for p in img_paths:
        im = Image.open(p).convert('RGB')
        ims.append(preprocess(im))
    with torch.no_grad():
        batch = torch.stack(ims).to(device)
        feats = model.encode_image(batch)
        feats = feats / feats.norm(dim=-1, keepdim=True)
        return feats.cpu().float().numpy()

for vid in sorted(os.listdir(frames_root)):
    vid_dir = os.path.join(frames_root, vid)
    if not os.path.isdir(vid_dir):
        continue
    imgs = [f for f in os.listdir(vid_dir) if f.lower().endswith('.jpg')]
    if not imgs:
        continue
    imgs = sorted(imgs, key=lambda x: int(os.path.splitext(x)[0]))
    file_paths = [f'./data/video_frames/{vid}/{name}' for name in imgs]
    feats_list=[]
    bs=64
    for i in tqdm(range(0, len(imgs), bs), desc=f'Encoding {vid}'):
        batch_paths = [os.path.join(vid_dir, name) for name in imgs[i:i+bs]]
        feats_list.append(encode_batch(batch_paths))
    feats_np = np.concatenate(feats_list, axis=0)
    with open(os.path.join(out_dir, f'{vid}.pkl'), 'wb') as f:
        pickle.dump((file_paths, feats_np), f)

# Build and save model pickle
from nitzche_clip import NitzcheCLIP
m = NitzcheCLIP(out_dir)
os.makedirs('models', exist_ok=True)
m.save('models/clip_vit_b32_nitzche.pkl')
# Patch .env
env_path = '.env'
content = ''
if os.path.exists(env_path):
    content = open(env_path,'r',encoding='utf-8').read()
lines = []
seen_path = False; seen_16=False
for line in content.splitlines():
    if line.strip().startswith('MODEL_PATH='):
        lines.append('MODEL_PATH="./models/"')
        seen_path=True
    elif line.strip().startswith('MODEL_16='):
        lines.append('MODEL_16="clip_vit_b32_nitzche.pkl"')
        seen_16=True
    else:
        lines.append(line)
if not seen_path: lines.append('MODEL_PATH="./models/"')
if not seen_16: lines.append('MODEL_16="clip_vit_b32_nitzche.pkl"')
open(env_path,'w',encoding='utf-8').write('
'.join(lines)+'
')
print('Model built and .env updated.')


In [None]:
# Wire dataset into backend: copy, rename keyframes, pack features, build model
%cd /content/aic-25
# Guard: ensure required folders exist before setup (run the previous cell first)
!test -d example_dataset/map-keyframes || (echo 'ERROR: Missing example_dataset/map-keyframes. Run the download cell first with --extract.' && false)
!test -d example_dataset/clip-features-32 || (echo 'ERROR: Missing example_dataset/clip-features-32. Run the download cell first with --extract.' && false)
!test -d example_dataset/media-info || (echo 'ERROR: Missing example_dataset/media-info. Run the download cell first with --extract.' && false)
!test -d example_dataset/keyframes || (echo 'ERROR: Missing example_dataset/keyframes. Run the download cell first with --extract.' && false)
!python tools/aic_cli.py setup-example --example-dir example_dataset
!ls -la aic-24-BE/data/video_frames | head -n 20
!ls -la aic-24-BE/data/clip_features | head -n 20
!cat aic-24-BE/.env


In [None]:
# Start backend API in the background on port 8000
%cd /content/aic-25
# Use daemon mode so the notebook remains responsive
!python tools/aic_cli.py serve --port 8000 --run --daemon --no-reload
!python tools/aic_cli.py serve-status
import time, requests
for _ in range(30):
    try:
        r = requests.get('http://localhost:8000/docs', timeout=2)
        print('Backend reachable:', r.status_code)
        break
    except Exception:
        time.sleep(1)
else:
    print('Backend not reachable')


In [None]:
# Create a demo KIS query (inline; no files needed)
%cd /content/aic-25
query_text = 'tin tức thời sự'  # edit your KIS query here
print('Query:', (query_text[:120] + ('...' if len(query_text) > 120 else '')))


In [None]:
# Export KIS CSVs to submission/ (inline query)
%cd /content/aic-25
!python tools/aic_cli.py export --text "$query_text" --task kis --name query-1 --api http://localhost:8000 --outdir submission --wait-api 30
!echo 'Generated files:' && ls -la submission
!echo 'Preview:' && head -n 5 submission/query-1-kis.csv


In [None]:
# (Optional) Zip for Codabench and download
%cd /content/aic-25
!python tools/aic_cli.py zip-submission --outdir submission --name aic25_submission.zip
from google.colab import files as colab_files
colab_files.download('aic25_submission.zip')
