# Vast.ai Project Setup

**Quick setup for FSLR preprocessing on Vast.ai**

## Folder Structure:
```
/workspace/
├── fslr-transformer-vs-iv3gru/     # Project code
├── data/
│   ├── raw/                        # Upload videos here
│   └── processed/                  # Output here
```


In [None]:
import subprocess
import sys
import os
from pathlib import Path

def run_command(cmd, description="", retries=3, timeout=300):
    """Run command with error handling and retries"""
    for attempt in range(retries):
        try:
            print(f"{description}... (attempt {attempt + 1}/{retries})")
            result = subprocess.run(cmd, check=True, capture_output=True, text=True, timeout=timeout)
            print(f"✅ {description} completed")
            return True
        except subprocess.TimeoutExpired:
            print(f"⏰ {description} timed out after {timeout}s")
            if attempt < retries - 1:
                print(f"🔄 Retrying {description}...")
                continue
            else:
                print(f"❌ {description} failed after {retries} attempts due to timeout")
                return False
        except subprocess.CalledProcessError as e:
            print(f"❌ {description} failed (attempt {attempt + 1}/{retries}):")
            print(f"Command: {' '.join(cmd)}")
            print(f"Error: {e.stderr}")
            if attempt < retries - 1:
                print(f"🔄 Retrying {description}...")
                continue
            else:
                return False
        except FileNotFoundError as e:
            print(f"❌ Command not found: {e}")
            return False
    return False

# Create virtual environment
venv_path = Path("/workspace/venv")
if not venv_path.exists():
    if not run_command([sys.executable, "-m", "venv", str(venv_path)], "Creating virtual environment"):
        print("❌ Failed to create virtual environment. Exiting.")
        sys.exit(1)
else:
    print("✅ Virtual environment already exists")

# Set up virtual environment paths
if os.name == 'nt':  # Windows
    python_exe = venv_path / "Scripts" / "python.exe"
    pip_exe = venv_path / "Scripts" / "pip.exe"
else:  # Linux/Unix (Vast.ai uses Linux)
    python_exe = venv_path / "bin" / "python"
    pip_exe = venv_path / "bin" / "pip"

# Verify virtual environment is working
if not python_exe.exists():
    print(f"❌ Python executable not found at {python_exe}")
    sys.exit(1)

print(f"🐍 Using Python: {python_exe}")

# Clone repository if not exists
project_root = Path("/workspace/fslr-transformer-vs-iv3gru")
if not project_root.exists():
    if not run_command(["git", "clone", "https://github.com/jeipab/fslr-transformer-vs-iv3gru.git", str(project_root)], "Cloning repository"):
        print("❌ Failed to clone repository. Exiting.")
        sys.exit(1)
else:
    if not run_command(["git", "-C", str(project_root), "pull"], "Updating repository"):
        print("⚠️ Failed to update repository, continuing with existing code")

# Add project to Python path
sys.path.insert(0, str(project_root))

# Install dependencies using virtual environment pip
requirements_file = project_root / "requirements.txt"
if not requirements_file.exists():
    print(f"❌ Requirements file not found at {requirements_file}")
    sys.exit(1)

# Install dependencies with extended timeout and retry logic
pip_cmd = [
    str(pip_exe), "install", 
    "--timeout", "600",  # 10 minutes timeout
    "--retries", "5",    # 5 retries
    "--no-cache-dir",    # Avoid cache issues
    "-r", str(requirements_file)
]

if not run_command(pip_cmd, "Installing dependencies", retries=2, timeout=600):
    print("❌ Failed to install dependencies after retries.")
    print("🔄 Trying alternative installation method...")
    
    # Try installing packages individually with shorter timeout
    packages = ["torch", "torchvision", "numpy", "opencv-python", "mediapipe", "tqdm", "pyarrow"]
    for package in packages:
        print(f"Installing {package} individually...")
        if run_command([str(pip_exe), "install", "--timeout", "300", package], 
                      f"Installing {package}", retries=2, timeout=300):
            print(f"✅ {package} installed successfully")
        else:
            print(f"⚠️ {package} installation failed, continuing...")
    
    # Try installing remaining packages from requirements
    remaining_cmd = [str(pip_exe), "install", "--timeout", "300", "-r", str(requirements_file), "--ignore-installed"]
    if not run_command(remaining_cmd, "Installing remaining dependencies", retries=1, timeout=600):
        print("⚠️ Some dependencies may not be installed. Check manually.")

# Verify key packages are installed
key_packages = ["torch", "numpy", "opencv-python", "mediapipe"]
print("\n🔍 Verifying key packages...")
for package in key_packages:
    try:
        result = subprocess.run([str(pip_exe), "show", package], capture_output=True, text=True)
        if result.returncode == 0:
            print(f"✅ {package} installed")
        else:
            print(f"⚠️ {package} not found")
    except Exception as e:
        print(f"⚠️ Could not verify {package}: {e}")

print("\n✅ Setup complete!")
print(f"📁 Project: {project_root}")
print(f"🐍 Virtual environment: {venv_path}")
print(f"📁 Upload videos to: {project_root}/data/raw/")
print(f"📁 Output will go to: {project_root}/data/processed/")
print("\n🚀 Next: Run run_multi_preprocess.ipynb")
print(f"\n💡 To use this environment in other notebooks:")
print(f"   1. Restart kernel")
print(f"   2. Run: import sys; sys.executable = '{python_exe}'")
print(f"   3. Or use: !{python_exe} -m jupyter notebook")

In [None]:
# Test virtual environment functionality
print("🧪 Testing virtual environment...")

# Test 1: Check Python version
try:
    result = subprocess.run([str(python_exe), "--version"], capture_output=True, text=True)
    print(f"✅ Python version: {result.stdout.strip()}")
except Exception as e:
    print(f"❌ Python version check failed: {e}")

# Test 2: Check pip version
try:
    result = subprocess.run([str(pip_exe), "--version"], capture_output=True, text=True)
    print(f"✅ Pip version: {result.stdout.strip()}")
except Exception as e:
    print(f"❌ Pip version check failed: {e}")

# Test 3: Test importing key packages
print("\n🔍 Testing package imports...")
test_imports = [
    ("torch", "PyTorch"),
    ("numpy", "NumPy"),
    ("cv2", "OpenCV"),
    ("mediapipe", "MediaPipe")
]

for module, name in test_imports:
    try:
        # Create a temporary script to test import
        if module == "mediapipe":
            # Special handling for MediaPipe to fix matplotlib backend issue
            test_script = f"""
import sys
import os
sys.path.insert(0, '{project_root}')

# Fix matplotlib backend issue for headless environments
os.environ['MPLBACKEND'] = 'Agg'
import matplotlib
matplotlib.use('Agg')

import {module}
print(f"✅ {name} imported successfully")
"""
        else:
            test_script = f"""
import sys
sys.path.insert(0, '{project_root}')
import {module}
print(f"✅ {name} imported successfully")
"""
        
        result = subprocess.run([str(python_exe), "-c", test_script], 
                              capture_output=True, text=True, timeout=10)
        if result.returncode == 0:
            print(f"✅ {name} import test passed")
        else:
            print(f"❌ {name} import test failed: {result.stderr}")
    except Exception as e:
        print(f"❌ {name} import test error: {e}")

print("\n🎯 Virtual environment test complete!")


## Troubleshooting

### Common Issues:

**1. Virtual environment not working:**
```bash
# Recreate virtual environment
rm -rf /workspace/venv
python -m venv /workspace/venv
```

**2. Package installation fails (network timeout):**
```bash
# Update pip first with timeout
/workspace/venv/bin/pip install --upgrade pip --timeout 300

# Try with extended timeout and retries
/workspace/venv/bin/pip install --timeout 600 --retries 5 --no-cache-dir -r /workspace/fslr-transformer-vs-iv3gru/requirements.txt

# If still failing, install packages individually
/workspace/venv/bin/pip install torch torchvision --timeout 600
/workspace/venv/bin/pip install opencv-python mediapipe --timeout 600
```

**3. MediaPipe matplotlib backend error:**
```python
# Fix matplotlib backend before importing MediaPipe
import os
import matplotlib
os.environ['MPLBACKEND'] = 'Agg'
matplotlib.use('Agg')
import mediapipe  # Now this should work
```

**4. Import errors in notebooks:**
- Restart kernel
- Run: `import sys; sys.executable = '/workspace/venv/bin/python'`
- Or use: `!/workspace/venv/bin/python -m jupyter notebook`

**5. Permission issues:**
```bash
# Fix permissions
chmod +x /workspace/venv/bin/python
chmod +x /workspace/venv/bin/pip
```

### Verification Commands:
```bash
# Check virtual environment
/workspace/venv/bin/python --version
/workspace/venv/bin/pip list

# Test imports
/workspace/venv/bin/python -c "import torch; print('PyTorch OK')"
/workspace/venv/bin/python -c "import cv2; print('OpenCV OK')"
/workspace/venv/bin/python -c "import os; os.environ['MPLBACKEND']='Agg'; import matplotlib; matplotlib.use('Agg'); import mediapipe; print('MediaPipe OK')"
```


In [None]:
# Fix matplotlib backend for MediaPipe (run this in any notebook that uses MediaPipe)
import os
import matplotlib

# Set matplotlib backend to Agg for headless environments
os.environ['MPLBACKEND'] = 'Agg'
matplotlib.use('Agg')

print("✅ Matplotlib backend set to Agg for headless environment")
print("💡 Run this cell before importing MediaPipe in any notebook")


## Manual Installation (if automatic setup fails)

If you encounter network timeout issues, try these manual steps:

### Option 1: Install packages individually
```bash
# Update pip first
/workspace/venv/bin/pip install --upgrade pip --timeout 300

# Install core packages one by one
/workspace/venv/bin/pip install torch torchvision --timeout 600
/workspace/venv/bin/pip install numpy opencv-python --timeout 300
/workspace/venv/bin/pip install mediapipe --timeout 600
/workspace/venv/bin/pip install tqdm pyarrow --timeout 300
/workspace/venv/bin/pip install pandas plotly streamlit --timeout 300
/workspace/venv/bin/pip install scikit-learn matplotlib seaborn --timeout 300
```

### Option 2: Use alternative PyPI mirrors
```bash
# Try different PyPI mirrors if main one is slow
/workspace/venv/bin/pip install -i https://pypi.douban.com/simple/ torch torchvision
/workspace/venv/bin/pip install -i https://pypi.tuna.tsinghua.edu.cn/simple/ opencv-python mediapipe
```

### Option 3: Install without dependencies first
```bash
# Install packages without dependencies, then resolve
/workspace/venv/bin/pip install --no-deps torch torchvision
/workspace/venv/bin/pip install --no-deps opencv-python mediapipe
# Then install remaining dependencies
/workspace/venv/bin/pip install -r /workspace/fslr-transformer-vs-iv3gru/requirements.txt
```
