# 🎮 AION GPU Worker - Google Colab

**AUTO-SHUTDOWN:** Desliga automaticamente após 11.5h (30min antes do limite do Colab)

**Setup:**
1. Runtime > Change runtime type > GPU (T4)
2. Preencher variáveis abaixo
3. Run All (Ctrl+F9)
4. Worker registra automaticamente e começa a trabalhar

In [None]:
# ============================================================================
# JAVASCRIPT KEEP-ALIVE (Previne Idle Timeout de 90 minutos)
# ============================================================================
# IMPORTANTE: Esta célula PREVINE que o Google Colab desligue por inatividade!
# O heartbeat HTTP NÃO é suficiente - Google precisa de "user interaction"
#
# Execute esta célula E DEIXE A ABA DO COLAB ABERTA (pode minimizar)
# O JavaScript vai simular cliques a cada 60s para manter a sessão ativa
# ============================================================================

from IPython.display import Javascript

display(Javascript('''
function ClickConnect() {
    console.log("[AION Keep-Alive] Preventing idle timeout...");
    
    // Tenta clicar no botão de conexão (se existir)
    const connectBtn = document.querySelector("#top-toolbar > colab-connect-button");
    if (connectBtn && connectBtn.shadowRoot) {
        const btn = connectBtn.shadowRoot.querySelector("#connect");
        if (btn) {
            btn.click();
            console.log("[AION Keep-Alive] ✓ Simulated user interaction");
        }
    }
    
    // Fallback: Mover o mouse virtualmente
    document.dispatchEvent(new MouseEvent('mousemove', {
        view: window,
        bubbles: true,
        cancelable: true
    }));
}

// Executar a cada 60 segundos (1 minuto)
const keepAliveInterval = setInterval(ClickConnect, 60000);

console.log("[AION Keep-Alive] ✅ Started! Session will stay active.");
console.log("[AION Keep-Alive] Simulating user interaction every 60 seconds...");
'''))

print("✅ JavaScript Keep-Alive ATIVADO!")
print("⚠️  IMPORTANTE: Deixe esta aba do Colab ABERTA (pode minimizar o navegador)")
print("   Se fechar a aba, o JavaScript para e o Colab desliga em ~90min")
print("")
print("🔍 Para verificar se está funcionando:")
print("   1. Pressione F12 (abrir DevTools)")
print("   2. Vá na aba Console")
print("   3. Deve aparecer '[AION Keep-Alive] ✓ Simulated user interaction' a cada 60s")

In [None]:
# ============================================================================
# CONFIGURAÇÃO - Preencha com seus dados
# ============================================================================

import os

AION_URL = "https://ff2e6297-cbf6-4c2e-ac26-3872e4c9c3ae-00-1vce3mtitdkqj.janeway.replit.dev"  # URL do AION (Replit Dev URL)
ACCOUNT_EMAIL = "your-google-account@gmail.com"  # Conta Google deste Colab
WORKER_NAME = "Colab-Account1-T4"  # Nome único para este worker

# Ngrok authtoken (já configurado nos secrets do AION)
NGROK_AUTHTOKEN = os.environ.get('NGROK_AUTH_TOKEN', '')  # Auto-preenchido

# Limites de segurança (NÃO ALTERE a menos que saiba o que está fazendo)
MAX_RUNTIME_HOURS = 10.0  # Desliga 2h antes do limite de 12h do Colab
HEARTBEAT_INTERVAL_SECONDS = 60  # Envia heartbeat a cada 60s

In [None]:
# ============================================================================
# INSTALAÇÃO DE DEPENDÊNCIAS
# ============================================================================

!pip install -q pyngrok requests torch transformers peft accelerate bitsandbytes

In [None]:
# ============================================================================
# SETUP FLASK SERVER (para endpoint /health)
# ============================================================================

from flask import Flask, jsonify
from pyngrok import ngrok
import requests
import time
import threading
from datetime import datetime, timedelta
import pytz
import subprocess
import sys

# Criar Flask app
app = Flask(__name__)
start_time = datetime.now(pytz.timezone("America/Sao_Paulo"))

@app.route('/health', methods=['GET'])
def health_check():
    """Endpoint para AION interrogar runtime e status do worker"""
    now = datetime.now()
    session_runtime_hours = (now - start_time).total_seconds() / 3600
    
    return jsonify({
        "status": "healthy",
        "sessionRuntimeHours": round(session_runtime_hours, 2),
        "maxSessionHours": MAX_RUNTIME_HOURS,
        "utilizationPercentage": round((session_runtime_hours / MAX_RUNTIME_HOURS) * 100, 1),
        "gpu": gpu_name if 'gpu_name' in globals() else "Unknown",
        "provider": "colab",
        "timestamp": now.isoformat()
    })

# Iniciar Flask server em thread separado
def run_flask():
    app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)

flask_thread = threading.Thread(target=run_flask, daemon=True)
flask_thread.start()
time.sleep(2)  # Aguardar Flask iniciar
print("✅ Flask server started on port 5000")

# Autenticar ngrok (OBRIGATÓRIO)
print("🔑 Authenticating ngrok...")
try:
    ngrok.set_auth_token(NGROK_AUTHTOKEN)
    print("✅ Ngrok authenticated successfully!")
except Exception as e:
    print(f"❌ Ngrok auth failed: {e}")
    print("Get your authtoken at: https://dashboard.ngrok.com/get-started/your-authtoken")
    sys.exit(1)

# Setup ngrok tunnel
print("🌐 Setting up ngrok tunnel...")
public_url = ngrok.connect(5000).public_url
print(f"✅ Worker accessible at: {public_url}")

# Detectar GPU
import torch
gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU"
vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3) if torch.cuda.is_available() else 0

print(f"🎮 GPU: {gpu_name} ({vram_gb:.1f} GB VRAM)")

In [None]:
# ============================================================================
# REGISTRAR WORKER NO AION
# ============================================================================

print("📝 Registering worker with AION...")

registration_data = {
    "provider": "colab",
    "accountId": ACCOUNT_EMAIL,
    "ngrokUrl": public_url,
    "capabilities": {
        "tor_enabled": False,
        "model": "llama-3-8b",
        "gpu": gpu_name,
        "vram_gb": int(vram_gb),
        "max_concurrent": 4
    }
}

try:
    response = requests.post(
        f"{AION_URL}/api/gpu/workers/register",
        json=registration_data,
        timeout=10
    )
    response.raise_for_status()
    worker_data = response.json()
    WORKER_ID = worker_data["worker"]["id"]
    print(f"✅ Registered successfully! Worker ID: {WORKER_ID}")
except Exception as e:
    print(f"❌ Registration failed: {e}")
    print("⚠️  Make sure AION_URL is correct and AION is running!")
    sys.exit(1)

In [None]:
# ============================================================================
# AUTO-SHUTDOWN SYSTEM (para 30min antes do limite)
# ============================================================================

start_time = datetime.now(pytz.timezone("America/Sao_Paulo"))
shutdown_time = start_time + timedelta(hours=MAX_RUNTIME_HOURS)
is_running = True

def auto_shutdown_monitor():
    """Monitora tempo e desliga automaticamente antes do limite"""
    global is_running
    
    while is_running:
        now = datetime.now(pytz.timezone("America/Sao_Paulo"))
        elapsed = (now - start_time).total_seconds() / 3600  # horas
        remaining = (shutdown_time - now).total_seconds() / 3600
        
        # Log a cada hora
        if int(elapsed * 60) % 60 == 0:  # A cada hora
            print(f"⏱️  Runtime: {elapsed:.1f}h / {MAX_RUNTIME_HOURS}h (remaining: {remaining:.1f}h)")
        
        # Alerta 1h antes
        if remaining <= 1.0 and remaining > 0.95:
            print("⚠️  WARNING: 1 hour until auto-shutdown!")
        
        # Alerta 30min antes
        if remaining <= 0.5 and remaining > 0.45:
            print("⚠️  WARNING: 30 minutes until auto-shutdown!")
        
        # SHUTDOWN
        if now >= shutdown_time:
            print("\n" + "="*60)
            print("🛑 AUTO-SHUTDOWN TRIGGERED")
            print(f"Runtime: {elapsed:.2f}h (limit: {MAX_RUNTIME_HOURS}h)")
            print("This prevents hitting Google's quota limits.")
            print("="*60 + "\n")
            
            is_running = False
            
            # Notifica AION que está desligando
            try:
                requests.post(
                    f"{AION_URL}/api/gpu/workers/heartbeat",
                    json={"workerId": WORKER_ID, "status": "offline"},
                    timeout=5
                )
            except:
                pass
            
            # Mata o runtime do Colab
            subprocess.call(["kill", "-9", "-1"])
            break
        
        time.sleep(60)  # Checa a cada minuto

# Inicia thread de auto-shutdown
shutdown_thread = threading.Thread(target=auto_shutdown_monitor, daemon=True)
shutdown_thread.start()

print(f"⏰ Auto-shutdown configured: {MAX_RUNTIME_HOURS}h runtime limit")
print(f"🛑 Will shutdown at: {shutdown_time.strftime('%H:%M:%S') + ' (São Paulo)'}")

In [None]:
# ============================================================================
# HEARTBEAT SYSTEM (mantém conexão com AION)
# ============================================================================

def send_heartbeat():
    """Envia heartbeat para AION a cada 60 segundos com runtime info"""
    while is_running:
        try:
            # Calcular runtime atual da sessão
            now = datetime.now(pytz.timezone("America/Sao_Paulo"))
            session_runtime_hours = (now - start_time).total_seconds() / 3600
            
            requests.post(
                f"{AION_URL}/api/gpu/workers/heartbeat",
                json={
                    "workerId": WORKER_ID,
                    "sessionRuntimeHours": round(session_runtime_hours, 2),
                    "maxSessionHours": MAX_RUNTIME_HOURS
                },
                timeout=10
            )
        except Exception as e:
            print(f"⚠️  Heartbeat failed: {e}")
        
        time.sleep(HEARTBEAT_INTERVAL_SECONDS)

# Inicia thread de heartbeat
heartbeat_thread = threading.Thread(target=send_heartbeat, daemon=True)
heartbeat_thread.start()

print("💓 Heartbeat started (60s interval) with runtime tracking")

In [None]:
# ============================================================================
# WORKER ESTÁ PRONTO!
# ============================================================================

print("\n" + "="*60)
print("✅ WORKER IS ONLINE AND READY!")
print("="*60)
print(f"Worker ID: {WORKER_ID}")
print(f"Account: {ACCOUNT_EMAIL}")
print(f"GPU: {gpu_name}")
print(f"Public URL: {public_url}")
print(f"Auto-shutdown: {shutdown_time.strftime('%H:%M:%S') + ' (São Paulo)'}")
print("="*60)
print("\n🎮 Worker will now process AION jobs automatically.")
print("🛑 Auto-shutdown will trigger in {:.1f} hours\n".format(MAX_RUNTIME_HOURS))

# Mantém o notebook rodando
while is_running:
    time.sleep(60)