# VideoLingo Unified Cloud Server

Combines WhisperX (ASR) + Demucs (Vocal Separation)

**Steps:**
1. Set GPU: Runtime -> Change runtime type -> GPU
2. Paste ngrok token in Step 5
3. (Optional) Paste Hugging Face Token in Step 5 for Diarization
4. Run All (Step 5 will block and keep running)
5. Press Ctrl+C in Step 5 output to stop

In [None]:
# Step 1: Install Mamba and create environment
import os, sys

ENV_PATH = '/content/conda-envs/videolingo' if 'google.colab' in sys.modules else '/kaggle/working/conda-envs/videolingo' if os.path.exists('/kaggle') else os.path.expanduser('~/conda-envs/videolingo')

print("Installing Mamba...")
if not os.path.exists(os.path.expanduser('~/miniforge3/bin/mamba')):
    !wget -q -O /tmp/miniforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh
    !bash /tmp/miniforge.sh -b -p ~/miniforge3

MAMBA = os.path.expanduser('~/miniforge3/bin/mamba')

print(f"Creating env: {ENV_PATH}")
if os.path.exists(ENV_PATH):
    !{MAMBA} remove -p {ENV_PATH} --all -y -q

!{MAMBA} create -p {ENV_PATH} -c conda-forge -y python=3.10 ffmpeg git pip

import json
with open('.conda_python_path', 'w') as f:
    json.dump({'python_path': f'{ENV_PATH}/bin/python', 'env_path': ENV_PATH}, f)

print("‚úÖ Env created!")

In [None]:
# Step 2: Install dependencies
import json, os, sys

with open('.conda_python_path', 'r') as f:
    cfg = json.load(f)
    PYTHON = cfg['python_path']

print(f"Python: {PYTHON}")
!{PYTHON} --version

print("\nInstalling...")
!{PYTHON} -m pip install --no-input torch==2.0.0 torchaudio==2.0.0 --index-url https://download.pytorch.org/whl/cu118
!{PYTHON} -m pip install --no-input whisperx demucs fastapi uvicorn python-multipart pyngrok librosa soundfile
!{PYTHON} -m pyngrok install

print("\n‚úÖ Done!")

In [None]:
# Step 3: Verify
import json

with open('.conda_python_path', 'r') as f:
    PYTHON = json.load(f)['python_path']

print(f"Python: {PYTHON}")
!{PYTHON} --version

for pkg in ['torch', 'whisperx', 'demucs', 'fastapi', 'librosa']:
    result = !{PYTHON} -c "import {pkg}; print('OK')" 2>&1
    print(f"  {'‚úÖ' if result and result[0] == 'OK' else '‚ùå'} {pkg}")

!{PYTHON} -c "import torch; print('GPU:', torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU')"

In [None]:
# Step 4: Download server files
import urllib.request, os, time, re

BASE_URL = "https://raw.githubusercontent.com/infinite-gaming-studio/VideoLingo/feature/speaker-diarization-tts/videolingo_cloud"
files_to_download = ['_version.py', 'unified_server.py']

# Download all required files
for filename in files_to_download:
    url = f"{BASE_URL}/{filename}?t={int(time.time())}"
    if os.path.exists(filename):
        os.remove(filename)
    print(f"Downloading: {filename}")
    urllib.request.urlretrieve(url, filename)

# Extract and print version from _version.py
with open('_version.py', 'r', encoding='utf-8') as f:
    content = f.read()
    version_match = re.search(r'__version__ = "(.*?)"', content)
    version = version_match.group(1) if version_match else "unknown"
    print(f"\n‚úÖ Downloaded server files v{version}")
    print(f"   - _version.py (version management)")
    print(f"   - unified_server.py (main server)")

In [None]:
# Step 5: Run server (BLOCKING - Ctrl+C to stop)
NGROK_TOKEN = ""
SERVER_TOKEN = ""  
HF_TOKEN = ""  

if not NGROK_TOKEN:
    raise ValueError('Paste ngrok token above')

import os, json
with open('.conda_python_path', 'r') as f:
    cfg = json.load(f)
    PYTHON = cfg['python_path']
    ENV_PATH = cfg['env_path']

os.environ['NGROK_TOKEN'] = NGROK_TOKEN

# HUGGING FACE TOKEN (For Diarization)
if HF_TOKEN:
    os.environ['HF_TOKEN'] = HF_TOKEN
    print("üîë HF_TOKEN set")

# ËÆæÁΩÆÊúçÂä°Âô®ËÆøÈóÆ‰ª§ÁâåÔºàÂèØÈÄâÔºâ
if SERVER_TOKEN:
    os.environ['WHISPER_SERVER_TOKEN'] = SERVER_TOKEN
    print("üîí Token authentication enabled")
else:
    print("‚ö†Ô∏è Warning: No SERVER_TOKEN set, server will accept all requests")

# Add conda site-packages to path for direct execution if needed
os.environ['PYTHONPATH'] = f"{ENV_PATH}/lib/python3.10/site-packages:" + os.environ.get('PYTHONPATH', '')

print(f"Starting server with {PYTHON}...")
!{PYTHON} unified_server.py