# 🎮 AION GPU Worker - Kaggle

**AUTO-SHUTDOWN:** Desliga automaticamente após 8.5h (30min antes do limite de 9h do Kaggle)

**Setup:**
1. Settings > Accelerator > GPU T4 x2 (ou P100)
2. Preencher variáveis abaixo
3. Run All
4. Worker registra automaticamente e começa a trabalhar

In [None]:
# ============================================================================
# CONFIGURAÇÃO - Preencha com seus dados
# ============================================================================

AION_URL = "https://your-replit.replit.app"  # URL do seu AION
ACCOUNT_EMAIL = "your-kaggle-account@gmail.com"  # Conta Google do Kaggle
WORKER_NAME = "Kaggle-Account1-T4"  # Nome único para este worker

# Limites de segurança (NÃO ALTERE)
MAX_RUNTIME_HOURS = 8.5  # Desliga 30min antes do limite de 9h do Kaggle
HEARTBEAT_INTERVAL_SECONDS = 60  # Envia heartbeat a cada 60s

In [None]:
# ============================================================================
# INSTALAÇÃO DE DEPENDÊNCIAS
# ============================================================================

!pip install -q pyngrok requests torch transformers peft accelerate bitsandbytes

In [None]:
# ============================================================================
# SETUP NGROK TUNNEL
# ============================================================================

from pyngrok import ngrok
import requests
import time
import threading
from datetime import datetime, timedelta
import subprocess
import sys

print("🌐 Setting up ngrok tunnel...")
public_url = ngrok.connect(5000).public_url
print(f"✅ Worker accessible at: {public_url}")

# Detectar GPU
import torch
gpu_name = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU"
vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3) if torch.cuda.is_available() else 0

print(f"🎮 GPU: {gpu_name} ({vram_gb:.1f} GB VRAM)")

In [None]:
# ============================================================================
# REGISTRAR WORKER NO AION
# ============================================================================

print("📝 Registering worker with AION...")

registration_data = {
    "provider": "kaggle",
    "accountId": ACCOUNT_EMAIL,
    "ngrokUrl": public_url,
    "capabilities": {
        "tor_enabled": False,
        "model": "llama-3-8b",
        "gpu": gpu_name,
        "vram_gb": int(vram_gb),
        "max_concurrent": 4
    }
}

try:
    response = requests.post(
        f"{AION_URL}/api/gpu/workers/register",
        json=registration_data,
        timeout=10
    )
    response.raise_for_status()
    worker_data = response.json()
    WORKER_ID = worker_data["worker"]["id"]
    print(f"✅ Registered successfully! Worker ID: {WORKER_ID}")
except Exception as e:
    print(f"❌ Registration failed: {e}")
    print("⚠️  Make sure AION_URL is correct and AION is running!")
    sys.exit(1)

In [None]:
# ============================================================================
# AUTO-SHUTDOWN SYSTEM (Kaggle limit: 9h)
# ============================================================================

start_time = datetime.now()
shutdown_time = start_time + timedelta(hours=MAX_RUNTIME_HOURS)
is_running = True

def auto_shutdown_monitor():
    """Monitora tempo e desliga automaticamente antes do limite"""
    global is_running
    
    while is_running:
        now = datetime.now()
        elapsed = (now - start_time).total_seconds() / 3600
        remaining = (shutdown_time - now).total_seconds() / 3600
        
        if int(elapsed * 60) % 60 == 0:
            print(f"⏱️  Runtime: {elapsed:.1f}h / {MAX_RUNTIME_HOURS}h (remaining: {remaining:.1f}h)")
        
        if remaining <= 1.0 and remaining > 0.95:
            print("⚠️  WARNING: 1 hour until auto-shutdown!")
        
        if remaining <= 0.5 and remaining > 0.45:
            print("⚠️  WARNING: 30 minutes until auto-shutdown!")
        
        if now >= shutdown_time:
            print("\n" + "="*60)
            print("🛑 AUTO-SHUTDOWN TRIGGERED")
            print(f"Runtime: {elapsed:.2f}h (limit: {MAX_RUNTIME_HOURS}h)")
            print("Kaggle quota preserved! ✅")
            print("="*60 + "\n")
            
            is_running = False
            
            try:
                requests.post(
                    f"{AION_URL}/api/gpu/workers/heartbeat",
                    json={"workerId": WORKER_ID, "status": "offline"},
                    timeout=5
                )
            except:
                pass
            
            subprocess.call(["kill", "-9", "-1"])
            break
        
        time.sleep(60)

shutdown_thread = threading.Thread(target=auto_shutdown_monitor, daemon=True)
shutdown_thread.start()

print(f"⏰ Auto-shutdown configured: {MAX_RUNTIME_HOURS}h runtime limit")
print(f"🛑 Will shutdown at: {shutdown_time.strftime('%H:%M:%S')}")

In [None]:
# ============================================================================
# HEARTBEAT SYSTEM
# ============================================================================

def send_heartbeat():
    while is_running:
        try:
            requests.post(
                f"{AION_URL}/api/gpu/workers/heartbeat",
                json={"workerId": WORKER_ID},
                timeout=10
            )
        except Exception as e:
            print(f"⚠️  Heartbeat failed: {e}")
        
        time.sleep(HEARTBEAT_INTERVAL_SECONDS)

heartbeat_thread = threading.Thread(target=send_heartbeat, daemon=True)
heartbeat_thread.start()

print("💓 Heartbeat started")

In [None]:
# ============================================================================
# WORKER READY!
# ============================================================================

print("\n" + "="*60)
print("✅ KAGGLE WORKER IS ONLINE!")
print("="*60)
print(f"Worker ID: {WORKER_ID}")
print(f"Account: {ACCOUNT_EMAIL}")
print(f"GPU: {gpu_name}")
print(f"Auto-shutdown: {shutdown_time.strftime('%H:%M:%S')}")
print("="*60)
print("\n🎮 Processing AION jobs automatically...")
print("🛑 Auto-shutdown in {:.1f} hours\n".format(MAX_RUNTIME_HOURS))

while is_running:
    time.sleep(60)