In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForCausalLM
from pathlib import Path
import os
import torch.nn as nn

# Configurações
MODEL_NAME = "LLaMAX/LLaMAX3-8B"  # nome pode variar, ajuste conforme disponível
DATA_DIR = "data"
BATCH_SIZE = 32
BLOCK_SIZE = 64
EPOCHS = 100
LR = 5e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DROPOUT_RATE = 0.17  # 17%

# Tokenizador e modelo
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)

# Função para alterar dropout no modelo recursivamente
def set_dropout(module, p):
    for name, child in module.named_children():
        if isinstance(child, nn.Dropout):
            setattr(module, name, nn.Dropout(p=p))
        else:
            set_dropout(child, p)

# Aplica dropout 17%
set_dropout(model, DROPOUT_RATE)

# Ativa economia de memória
model.gradient_checkpointing_enable()

# Dataset customizado
class TxtDataset(Dataset):
    def __init__(self, folder, tokenizer, block_size=64):
        self.examples = []
        for file in Path(folder).glob("*.txt"):
            with open(file, "r", encoding="utf-8") as f:
                text = f.read()
            tokens = tokenizer.encode(text, add_special_tokens=True)
            for i in range(0, len(tokens) - block_size, block_size):
                self.examples.append(tokens[i:i + block_size])
    
    def __len__(self):
        return len(self.examples)
    
    def __getitem__(self, idx):
        x = torch.tensor(self.examples[idx])
        return {"input_ids": x, "labels": x.clone()}

# Carrega dataset
dataset = TxtDataset(DATA_DIR, tokenizer, BLOCK_SIZE)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Otimizador
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

# Treinamento
model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    for step, batch in enumerate(dataloader):
        torch.cuda.empty_cache()  # libera memória da GPU
        input_ids = batch["input_ids"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)

        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += loss.item()

    print(f"Época {epoch+1} finalizada. Loss médio: {total_loss / len(dataloader):.4f}")

# Salvar modelo
output_dir = "modelo_finetunado"
os.makedirs(output_dir, exist_ok=True)
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"✅ Modelo salvo em: {output_dir}")


tokenizer_config.json:   0%|          | 0.00/50.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/851 [00:00<?, ?B/s]

2025-05-27 20:34:58.744898: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748388898.757416  132778 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748388898.760737  132778 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748388898.769938  132778 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748388898.769955  132778 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748388898.769957  132778 computation_placer.cc:177] computation placer alr

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

model-00006-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00007-of-00007.safetensors:   0%|          | 0.00/2.57G [00:00<?, ?B/s]

model-00001-of-00007.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00002-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00004-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00005-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00003-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]