# AION - LoRA Fine-Tuning Worker (Google Colab)

Este notebook:
1. Conecta ao servidor AION principal
2. Baixa datasets automaticamente
3. Treina modelos com LoRA (Parameter-Efficient Fine-Tuning)
4. Serve o modelo via API (Ngrok)
5. Envia heartbeat para o AION saber que est√° online

## Configura√ß√£o Inicial

In [None]:
# Vari√°veis de configura√ß√£o
AION_SERVER_URL = "https://your-replit-app.replit.dev"  # ALTERE AQUI
WORKER_ID = 1  # ID √∫nico deste worker
PROVIDER = "colab"  # colab, kaggle, ou modal

## 1. Instala√ß√£o de Depend√™ncias

In [None]:
!pip install -q transformers==4.36.2
!pip install -q peft==0.7.1
!pip install -q datasets==2.16.1
!pip install -q accelerate==0.25.0
!pip install -q bitsandbytes==0.41.3
!pip install -q scipy
!pip install -q flask
!pip install -q pyngrok
!pip install -q requests

print("‚úÖ Depend√™ncias instaladas!")

## 2. Configurar Ngrok (Para expor API publicamente)

In [None]:
from pyngrok import ngrok
import os

# OBTENHA SEU TOKEN GR√ÅTIS EM: https://dashboard.ngrok.com/get-started/your-authtoken
NGROK_TOKEN = ""  # COLE SEU TOKEN AQUI

if NGROK_TOKEN:
    ngrok.set_auth_token(NGROK_TOKEN)
    print("‚úÖ Ngrok configurado!")
else:
    print("‚ö†Ô∏è  Configure NGROK_TOKEN para conectar ao AION")

## 3. Fun√ß√£o de Download de Dataset do AION

In [None]:
import requests
import json

def download_dataset(dataset_url, output_path="/content/dataset.jsonl"):
    """
    Baixa dataset do servidor AION
    """
    print(f"üì• Baixando dataset de {dataset_url}...")
    
    response = requests.get(dataset_url, stream=True)
    response.raise_for_status()
    
    with open(output_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)
    
    # Contar exemplos
    with open(output_path, 'r') as f:
        num_examples = sum(1 for line in f)
    
    print(f"‚úÖ Dataset baixado: {num_examples} exemplos em {output_path}")
    return output_path, num_examples

## 4. Fun√ß√£o de Fine-Tuning com LoRA

In [None]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from datasets import load_dataset

def train_lora_model(
    dataset_path,
    base_model="meta-llama/Llama-2-7b-chat-hf",  # Ou "mistralai/Mistral-7B-v0.1"
    lora_r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    epochs=3,
    batch_size=4,
    learning_rate=2e-4,
    output_dir="/content/lora_model"
):
    """
    Treina modelo com LoRA
    """
    print(f"\nüèãÔ∏è Iniciando fine-tuning LoRA...")
    print(f"   Base model: {base_model}")
    print(f"   LoRA r={lora_r}, alpha={lora_alpha}")
    print(f"   Epochs: {epochs}, Batch: {batch_size}, LR: {learning_rate}")
    
    # 1. Carregar tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    tokenizer.pad_token = tokenizer.eos_token
    
    # 2. Carregar modelo base (quantizado para 4-bit para economizar VRAM)
    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        load_in_4bit=True,
        device_map="auto",
        trust_remote_code=True
    )
    
    # 3. Preparar modelo para LoRA
    model = prepare_model_for_kbit_training(model)
    
    # 4. Configurar LoRA
    lora_config = LoraConfig(
        r=lora_r,
        lora_alpha=lora_alpha,
        target_modules=["q_proj", "v_proj"],  # Apenas attention matrices
        lora_dropout=lora_dropout,
        bias="none",
        task_type="CAUSAL_LM"
    )
    
    model = get_peft_model(model, lora_config)
    model.print_trainable_parameters()
    
    # 5. Carregar dataset
    dataset = load_dataset('json', data_files=dataset_path, split='train')
    
    # 6. Preprocessar
    def preprocess(examples):
        # Formato Alpaca: instruction + input + output
        texts = []
        for i in range(len(examples['instruction'])):
            instruction = examples['instruction'][i]
            output = examples['output'][i]
            text = f"### Instruction:\n{instruction}\n\n### Response:\n{output}"
            texts.append(text)
        
        return tokenizer(texts, truncation=True, max_length=512, padding="max_length")
    
    tokenized_dataset = dataset.map(preprocess, batched=True, remove_columns=dataset.column_names)
    
    # 7. Configurar treinamento
    training_args = TrainingArguments(
        output_dir=output_dir,
        num_train_epochs=epochs,
        per_device_train_batch_size=batch_size,
        gradient_accumulation_steps=4,
        learning_rate=learning_rate,
        fp16=True,
        save_strategy="epoch",
        logging_steps=10,
        report_to="none"
    )
    
    # 8. Treinar!
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset,
        data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
    )
    
    print("\nüöÄ Iniciando treinamento...")
    trainer.train()
    
    # 9. Salvar modelo
    model.save_pretrained(output_dir)
    tokenizer.save_pretrained(output_dir)
    
    print(f"\n‚úÖ Modelo treinado e salvo em {output_dir}")
    return model, tokenizer

## 5. Servidor API para Infer√™ncia

In [None]:
from flask import Flask, request, jsonify
from threading import Thread
import time

app = Flask(__name__)

# Vari√°veis globais
model = None
tokenizer = None
training_status = {"status": "idle", "progress": 0}

@app.route('/health', methods=['GET'])
def health():
    return jsonify({
        "status": "online",
        "worker_id": WORKER_ID,
        "provider": PROVIDER,
        "model_loaded": model is not None,
        "training": training_status
    })

@app.route('/v1/chat/completions', methods=['POST'])
def chat_completions():
    """Infer√™ncia compat√≠vel com OpenAI API"""
    if model is None or tokenizer is None:
        return jsonify({"error": "Model not loaded"}), 503
    
    data = request.json
    messages = data.get('messages', [])
    max_tokens = data.get('max_tokens', 512)
    temperature = data.get('temperature', 0.7)
    
    # Converter mensagens para prompt
    prompt = "\n\n".join([f"{m['role']}: {m['content']}" for m in messages])
    prompt += "\n\nassistant:"
    
    # Gerar resposta
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True)
    
    return jsonify({
        "choices": [{
            "message": {
                "role": "assistant",
                "content": response.strip()
            },
            "finish_reason": "stop"
        }],
        "usage": {
            "prompt_tokens": inputs['input_ids'].shape[1],
            "completion_tokens": outputs.shape[1] - inputs['input_ids'].shape[1],
            "total_tokens": outputs.shape[1]
        }
    })

@app.route('/train', methods=['POST'])
def train_endpoint():
    """Recebe job de treino do AION"""
    global training_status
    
    data = request.json
    job_id = data.get('jobId')
    dataset_url = data.get('dataset')
    config = data.get('lora', {})
    
    # Iniciar treino em background
    def train_async():
        global model, tokenizer, training_status
        try:
            training_status = {"status": "downloading", "progress": 10}
            dataset_path, _ = download_dataset(dataset_url)
            
            training_status = {"status": "training", "progress": 30}
            model, tokenizer = train_lora_model(
                dataset_path,
                lora_r=config.get('r', 8),
                lora_alpha=config.get('alpha', 16),
                lora_dropout=config.get('dropout', 0.05)
            )
            
            training_status = {"status": "completed", "progress": 100}
            
            # Notificar AION
            requests.post(f"{AION_SERVER_URL}/api/gpu/training-complete", json={
                "workerId": WORKER_ID,
                "jobId": job_id,
                "status": "completed"
            })
        except Exception as e:
            training_status = {"status": "failed", "error": str(e)}
    
    Thread(target=train_async, daemon=True).start()
    
    return jsonify({"status": "training_started", "jobId": job_id})

def run_server():
    app.run(host='0.0.0.0', port=5000)

## 6. Heartbeat para AION

In [None]:
def send_heartbeat(ngrok_url):
    """
    Envia heartbeat para AION saber que worker est√° online
    """
    while True:
        try:
            requests.post(f"{AION_SERVER_URL}/api/gpu/heartbeat", json={
                "workerId": WORKER_ID,
                "provider": PROVIDER,
                "ngrokUrl": ngrok_url,
                "status": "online",
                "capabilities": {
                    "model": "llama-2-7b-lora",
                    "gpu": "T4",
                    "vram_gb": 16,
                    "max_concurrent": 1
                }
            })
        except:
            pass
        
        time.sleep(30)  # A cada 30 segundos

## 7. INICIAR WORKER üöÄ

In [None]:
# Iniciar servidor Flask em background
server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

time.sleep(3)  # Aguardar servidor iniciar

# Abrir t√∫nel Ngrok
public_url = ngrok.connect(5000)
print(f"\nüåê API p√∫blica: {public_url}")
print(f"üîó Health check: {public_url}/health")

# Registrar no AION
response = requests.post(f"{AION_SERVER_URL}/api/gpu/register", json={
    "workerId": WORKER_ID,
    "provider": PROVIDER,
    "ngrokUrl": str(public_url),
    "capabilities": {
        "model": "llama-2-7b-lora",
        "gpu": "T4",
        "vram_gb": 16
    }
})

print(f"\n‚úÖ Worker registrado no AION!")
print(f"Response: {response.json()}")

# Iniciar heartbeat
heartbeat_thread = Thread(target=send_heartbeat, args=(str(public_url),), daemon=True)
heartbeat_thread.start()

print("\nüíö WORKER ATIVO - Aguardando jobs de treino...")
print("Copie a URL do Ngrok e use no AION Admin Dashboard")

## 8. Keep-Alive (Manter Colab Ativo)

**Abra o Console do Browser (Ctrl+Shift+I) e execute:**

```javascript
function ClickConnect() {
  console.log('AION Keep-Alive Active');
  document.querySelector("colab-connect-button")?.shadowRoot.querySelector("#connect")?.click();
}
setInterval(ClickConnect, 60000);
```

In [None]:
# Manter c√©lula executando para evitar idle timeout
while True:
    time.sleep(600)  # 10 minutos
    print(f"‚è∞ Worker ainda ativo - {time.strftime('%H:%M:%S')}")