# VideoLingo Unified Cloud Server

Combines WhisperX (ASR) + Demucs (Vocal Separation)

**Steps:**
1. Set GPU: Runtime -> Change runtime type -> GPU
2. Paste ngrok token in Step 5
3. Run All (Step 5 will block and keep running)
4. Press Ctrl+C in Step 5 output to stop

In [None]:
# Step 1: Install Mamba and create environment
import os, sys, subprocess

ENV_PATH = '/content/conda-envs/videolingo' if 'google.colab' in sys.modules else '/kaggle/working/conda-envs/videolingo' if os.path.exists('/kaggle') else os.path.expanduser('~/conda-envs/videolingo')

print("Installing Mamba...")
if not os.path.exists(os.path.expanduser('~/miniforge3/bin/mamba')):
    !wget -q -O /tmp/miniforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh
    !bash /tmp/miniforge.sh -b -p ~/miniforge3

MAMBA = os.path.expanduser('~/miniforge3/bin/mamba')

print(f"Creating env: {ENV_PATH}")
if os.path.exists(ENV_PATH):
    !{MAMBA} remove -p {ENV_PATH} --all -y -q 2>/dev/null || true

!{MAMBA} create -p {ENV_PATH} -c conda-forge -y python=3.10 ffmpeg git pip
print("Env created!")

In [None]:
# Step 2: Install all dependencies
PYTHON = f"{ENV_PATH}/bin/python"

print("Installing dependencies...")
print("This may take 5-10 minutes...")

# Install PyTorch with CUDA support
!{PYTHON} -m pip install torch==2.0.0 torchaudio==2.0.0 --index-url https://download.pytorch.org/whl/cu118

# Install WhisperX and Demucs
!{PYTHON} -m pip install whisperx demucs

# Install API dependencies
!{PYTHON} -m pip install fastapi uvicorn python-multipart pyngrok requests

# Install ngrok binary
!{PYTHON} -m pyngrok install

print("\n‚úÖ All dependencies installed!")

In [None]:
# Step 3: Verify installation
print("Verifying...")
for pkg in ['torch', 'whisperx', 'demucs', 'fastapi']:
    r = subprocess.run([PYTHON, '-c', f'import {pkg}; print("OK")'], capture_output=True)
    print(f"  {'‚úÖ' if r.returncode == 0 else '‚ùå'} {pkg}")

import json
with open('.conda_python_path', 'w') as f:
    json.dump({'python_path': PYTHON, 'env_prefix': ENV_PATH}, f)

# Check GPU
!{PYTHON} -c "import torch; print(f'GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"}')"

In [None]:
# Step 4: Download unified server
import urllib.request
import os
import time

# Add timestamp to bypass GitHub cache
timestamp = int(time.time())
url = f"https://raw.githubusercontent.com/infinite-gaming-studio/VideoLingo/main/whisperx_cloud/unified_server.py?t={timestamp}"

# Always delete old file first
if os.path.exists('unified_server.py'):
    os.remove('unified_server.py')
    print("Removed old server file")

# Download new server file
urllib.request.urlretrieve(url, 'unified_server.py')
print("Downloaded unified server")

# Verify version
with open('unified_server.py', 'r', encoding='utf-8') as f:
    content = f.read()
    if 'SERVER_VERSION' in content:
        for line in content.split('\n'):
            if 'SERVER_VERSION' in line and '=' in line and not line.strip().startswith('#'):
                version = line.split('=')[1].strip().replace('"', "").replace("'", '')
                print(f"Server version: {version}")
                break

In [None]:
# Step 5: Start server with ngrok (BLOCKING - keeps running)
NGROK_TOKEN = ""  # <-- PASTE YOUR NGROK TOKEN HERE

if not NGROK_TOKEN:
    raise ValueError("Please paste your ngrok token above! Get it from https://dashboard.ngrok.com/get-started/your-authtoken")

import subprocess, threading, sys, time, os
sys.path.insert(0, f"{ENV_PATH}/lib/python3.10/site-packages")
from pyngrok import ngrok, conf

# Set token and connect
conf.get_default().auth_token = NGROK_TOKEN
public_url = ngrok.connect(8000, "http")
print(f"üåê Public URL: {public_url}")
print(f"\nüìã Copy to VideoLingo config.yaml:")
print(f"   whisper:")
print(f"     runtime: 'cloud'")
print(f"     whisperX_cloud_url: '{public_url}'")
print(f"   demucs: 'cloud'")
print(f"\nüöÄ Starting server... (Press Ctrl+C to stop)\n")

# Start server with real-time log streaming
proc = subprocess.Popen(
    [PYTHON, 'unified_server.py', '--port', '8000'],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True,
    bufsize=1
)

def stream_output(pipe, prefix):
    """Stream subprocess output with prefix"""
    for line in iter(pipe.readline, ''):
        if line:
            print(f"[{prefix}] {line}", end='')
    pipe.close()

# Start log streaming threads
threading.Thread(target=stream_output, args=(proc.stdout, "SERVER"), daemon=True).start()
threading.Thread(target=stream_output, args=(proc.stderr, "ERROR"), daemon=True).start()

# Wait for server to start
time.sleep(5)
print("\n‚úÖ Server is running!")
print("\nüì° Available endpoints:")
print(f"   Health:  {public_url}/")
print(f"   ASR:     {public_url}/asr/transcribe")
print(f"   Separate:{public_url}/separation/separate")
print("\nüí° Press Ctrl+C to stop\n")

# Keep running until interrupted
try:
    while proc.poll() is None:
        time.sleep(1)
except KeyboardInterrupt:
    print("\n\nüõë Shutting down...")
    proc.terminate()
    proc.wait(timeout=5)
    ngrok.disconnect(public_url)
    print("‚úÖ Stopped")