In [None]:
import os
import shutil
import sys

# 1. CLEANUP (Wipe previous failed attempts)
if os.path.exists("VideoLLaMA2"):
    print("üßπ Cleaning up old files...")
    shutil.rmtree("VideoLLaMA2")

# 2. CLONE
print("‚¨áÔ∏è Cloning repository...")
!git clone https://github.com/DAMO-NLP-SG/VideoLLaMA2.git
%cd VideoLLaMA2
!git checkout audio_visual

# 3. THE FIX: Remove ALL version constraints
# We tell the AI: "Use whatever versions Colab already has."
print("üõ†Ô∏è Patching for Colab 2025...")

# Remove Numpy & Torch constraints entirely (Fixes the binary crash)
!sed -i '/numpy/d' pyproject.toml
!sed -i '/numpy/d' requirements.txt
!sed -i '/torch/d' requirements.txt
!sed -i '/torchvision/d' requirements.txt

# Remove Flash Attention (Incompatible with T4)
!sed -i '/flash-attn/d' pyproject.toml
!sed -i '/flash-attn/d' requirements.txt

# 4. COMPATIBILITY PATCH (Monkey Patch)
# Numpy 2.0 removed 'np.float'. We add it back so old code doesn't break.
import numpy as np
if not hasattr(np, 'float'):
    np.float = float
    print("ü©π Applied Numpy 2.0 Patch")

# 5. INSTALL
print("üì¶ Installing dependencies (Keep existing versions)...")
# We use --no-deps for the library itself to prevent it from downgrading anything
!pip install -r requirements.txt --quiet
!pip install ffmpeg-python bitsandbytes accelerate deepspeed --quiet
!pip install -e . --no-build-isolation --no-deps

# 6. VERIFY
%cd ..
sys.path.append("/content/VideoLLaMA2") 
try:
    from videollama2 import model_init, mm_infer
    print("\n‚úÖ SUCCESS: Setup Verified. The crash is fixed.")
except ImportError as e:
    print(f"\n‚ùå ERROR: {e}")
except ValueError as e:
    print(f"\n‚ùå VERSION ERROR: {e}")
    print("Did you forget to 'Restart Session' before running this?")

üßπ Cleaning up old files...
‚¨áÔ∏è Cloning repository...
Cloning into 'VideoLLaMA2'...
remote: Enumerating objects: 986, done.[K
remote: Counting objects: 100% (427/427), done.[K
remote: Compressing objects: 100% (182/182), done.[K
remote: Total 986 (delta 361), reused 245 (delta 245), pack-reused 559 (from 2)[K
Receiving objects: 100% (986/986), 55.81 MiB | 40.22 MiB/s, done.
Resolving deltas: 100% (661/661), done.
/content/VideoLLaMA2
Branch 'audio_visual' set up to track remote branch 'audio_visual' from 'origin'.
Switched to a new branch 'audio_visual'
üõ†Ô∏è Patching for Colab 2025...
üì¶ Installing dependencies (Keep existing versions)...
Obtaining file:///content/VideoLLaMA2
  Checking if build backend supports build_editable ... [?25l[?25hdone
  Preparing editable metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: videollama2
  Building editable for videollama2 (pyproject.toml) ... [?25l[?25hdone
  Created wheel for videollama2: f

/content

‚úÖ SUCCESS: Setup Verified. The crash is fixed.


In [4]:
import torch
import gc
from videollama2 import model_init, mm_infer
from videollama2.utils import disable_torch_init

# 1. Clear RAM before loading
gc.collect()
torch.cuda.empty_cache()

# 2. Define Path
model_path = "DAMO-NLP-SG/VideoLLaMA2.1-7B-AV"
print("‚è≥ Downloading Model (4-bit Mode)...")

disable_torch_init()

# 3. Load Model
# I removed 'low_cpu_mem_usage=True' because the library adds it automatically.
model, processor, tokenizer = model_init(
    model_path, 
    load_in_4bit=True,  # This keeps it small (approx 5GB)
    device_map="auto"
)

print("‚úÖ Model Loaded & Ready!")

‚è≥ Downloading Model (4-bit Mode)...


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


RuntimeError: No GPU found. A GPU is needed for quantization.