## 1. Clone the repository

In [None]:
!git clone https://github.com/cyrilsofdevpro/SofAi.git
!ls -la
!ls -la SofAi
!ls -la SofAi/backend

In [None]:
import os
from IPython.display import display, Markdown

# Enable real model load and use Qwen2.5-0.5B-Instruct for quality Q&A (instruction-tuned, no loops)
os.environ['LOAD_REAL_MODEL'] = '1'
# Qwen2.5-0.5B-Instruct: small, instruction-tuned, free, coherent responses
os.environ['MODEL_NAME'] = os.environ.get('MODEL_NAME', 'Qwen/Qwen2.5-0.5B-Instruct')

display(Markdown(f"**LOAD_REAL_MODEL**={os.environ['LOAD_REAL_MODEL']}, **MODEL_NAME**={os.environ['MODEL_NAME']}"))

# Confirm GPU availability
print('Checking GPU (nvidia-smi):')
!nvidia-smi


## 2. Install dependencies (minimal set for fast startup)

In [None]:
%cd SofAi/backend
# Install only what we need for a quick demo (SKIP_MODEL_LOAD=1)
!pip install --quiet fastapi uvicorn pyngrok flask-cors

## 3. Start FastAPI backend (with dummy model for speed)

In [None]:
import os, time, subprocess, sys
from pathlib import Path

# If you want the notebook to load a real Hugging Face model (not dry-run),
# set the notebook environment variable `LOAD_REAL_MODEL=1` before running this cell.
# Example in a separate cell or runtime: import os; os.environ['LOAD_REAL_MODEL'] = '1'

load_real = os.environ.get("LOAD_REAL_MODEL", "0") in ("1", "true", "True")
if load_real:
    print("LOAD_REAL_MODEL=1 -> installing HF dependencies (this may take several minutes)...")
    # Install common HF deps. On Colab you should use a GPU runtime for larger models.
    # Quiet install to reduce output; installation may still take several minutes.
    get_ipython().system('pip install -q "torch" "transformers[torch]" accelerate peft safetensors')

# Locate the backend folder (robust to different clone locations)
search_paths = [Path("/content"), Path("/root"), Path('.')]
backend_path = None
for root in search_paths:
    try:
        for p in root.rglob("backend"):
            if (p / "main.py").exists():
                backend_path = p
                break
        if backend_path:
            break
    except Exception:
        continue

if not backend_path:
    # Fallback to common path used earlier
    backend_path = Path('/content/SofAi/backend')

print(f"Using backend path: {backend_path}")
if not backend_path.exists():
    raise FileNotFoundError(f"backend path not found: {backend_path}. Did you clone the repo?")

os.chdir(str(backend_path))
print(f"Current directory: {os.getcwd()}")
print(f"Files in current directory: {os.listdir('.')}")

# Set environment variable to control model loading in the server
# SKIP_MODEL_LOAD=1 -> dry-run dummy model (fast)
# SKIP_MODEL_LOAD=0 or unset -> load the real HF model (may be slow and require GPU)
if load_real:
    os.environ["SKIP_MODEL_LOAD"] = "0"
else:
    os.environ["SKIP_MODEL_LOAD"] = "1"

# Kill any existing uvicorn process
os.system("pkill -f 'uvicorn' 2>/dev/null || true")
# short pause
time.sleep(1)

# Start uvicorn in background: bind to 0.0.0.0 so ngrok can reach it
print("Starting uvicorn backend...")
get_ipython().system_raw(
    f'cd "{backend_path}" && python -m uvicorn main:app --host 0.0.0.0 --port 8000 --log-level info > uvicorn.log 2>&1 &'
)

# Wait for server to start and show startup logs
time.sleep(5)
print("\n=== Uvicorn Startup Logs ===")
!tail -n 300 uvicorn.log

## 4. Verify backend is listening locally

In [None]:
import time
time.sleep(1)

print("Checking if backend is listening...")
!curl -sS http://127.0.0.1:8000/health && echo "\n✓ /health endpoint working" || echo "✗ /health failed"

print("\nTesting /chat endpoint...")
!curl -sS -X POST http://127.0.0.1:8000/chat \
  -H "Content-Type: application/json" \
  -d '{"message":"hello from colab","session_id":"test123"}' && echo "\n✓ /chat endpoint working" || echo "✗ /chat failed"

## 5. Start ngrok tunnel (public URL)

In [None]:
from pyngrok import ngrok
from getpass import getpass

# Option A: Use getpass to securely enter token (recommended)
print("Enter your ngrok authtoken (from https://dashboard.ngrok.com/auth/your-authtoken):")
token = getpass("ngrok authtoken (will not echo): ")

if token.strip():
    ngrok.set_auth_token(token.strip())
else:
    print("⚠️ No token entered. Using default (may have connection limits).")

# Create tunnel: port 8000, TLS enabled for security
print("Creating ngrok tunnel...")
tunnel = ngrok.connect(8000, bind_tls=True)
PUBLIC_URL = tunnel.public_url

print(f"\n✓ Public ngrok URL: {PUBLIC_URL}")
print(f"\n  Use this URL to access your backend from anywhere:")
print(f"    Health check: {PUBLIC_URL}/health")
print(f"    Chat endpoint: {PUBLIC_URL}/chat (POST)")

## 6. Test public endpoint via ngrok

In [None]:
import subprocess
import requests

# Extract the public URL from the tunnel object
from pyngrok import ngrok
tunnels = ngrok.get_tunnels()
public_url = tunnels[0].public_url if tunnels else None

if public_url:
    print(f"Testing public endpoint: {public_url}\n")
    
    # Test /health
    print("=== Public /health ===")
    try:
        resp = requests.get(f"{public_url}/health", timeout=5)
        print(resp.json())
        print("✓ Public health check passed\n")
    except Exception as e:
        print(f"✗ Public health check failed: {e}\n")
    
    # Test /chat POST
    print("=== Public /chat POST ===")
    try:
        resp = requests.post(
            f"{public_url}/chat",
            json={"message": "test from public url", "session_id": "public_test"},
            timeout=5
        )
        print(resp.json())
        print("✓ Public chat endpoint passed")
    except Exception as e:
        print(f"✗ Public chat endpoint failed: {e}")
else:
    print("✗ No ngrok tunnel found. Make sure to run the tunnel creation cell first.")

## 7. (Optional) Check running processes and tunnel status

In [None]:
print("=== Running Processes ===")
!ps aux | grep -E "uvicorn|python" | grep -v grep

print("\n=== ngrok Tunnel Status ===")
!curl -s http://127.0.0.1:4040/api/tunnels | python -m json.tool

## 8. Frontend Setup (if using local React frontend)

If you want to test the React UI locally with this Colab backend:

1. In your local terminal, from `SofAI/frontend`:
   ```bash
   export VITE_API_BASE=https://<your-ngrok-url>  # use the PUBLIC_URL from step 5
   npm run dev
   ```

2. Open http://localhost:3001 in your browser and send messages.

The frontend will call your public ngrok endpoint. CORS is enabled in the FastAPI backend, so cross-origin requests should work.

## 9. Debugging: View backend logs if something fails

In [None]:
# Show latest uvicorn logs (useful if endpoint tests failed)
print("=== Latest Uvicorn Logs ===")
!tail -n 500 uvicorn.log

## 10. Stop backend (when done)

Run this cell to cleanly stop uvicorn and ngrok tunnels.

In [None]:
import os
from pyngrok import ngrok

print("Stopping uvicorn...")
os.system("pkill -f 'uvicorn' 2>/dev/null || true")

print("Closing ngrok tunnels...")
ngrok.kill()

print("✓ Backend and ngrok stopped.")