In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForCausalLM
from pathlib import Path
import os

# Configurações
# MODEL_NAME = "EleutherAI/pythia-160m"  # modelo mais leve
MODEL_NAME = "EleutherAI/pythia-1b"  # modelo mais leve
DATA_DIR = "data"
BATCH_SIZE = 32
BLOCK_SIZE = 64
EPOCHS = 100
LR = 5e-5
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Tokenizador e modelo
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to(DEVICE)

# Ativa economia de memória
model.gradient_checkpointing_enable()

# Dataset customizado
class TxtDataset(Dataset):
    def __init__(self, folder, tokenizer, block_size=64):
        self.examples = []
        for file in Path(folder).glob("*.txt"):
            with open(file, "r", encoding="utf-8") as f:
                text = f.read()
            tokens = tokenizer.encode(text, add_special_tokens=True)
            for i in range(0, len(tokens) - block_size, block_size):
                self.examples.append(tokens[i:i + block_size])
    
    def __len__(self):
        return len(self.examples)
    
    def __getitem__(self, idx):
        x = torch.tensor(self.examples[idx])
        return {"input_ids": x, "labels": x.clone()}

# Carrega dataset
dataset = TxtDataset(DATA_DIR, tokenizer, BLOCK_SIZE)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

# Otimizador
optimizer = torch.optim.AdamW(model.parameters(), lr=LR)

# Treinamento
model.train()
for epoch in range(EPOCHS):
    total_loss = 0
    for step, batch in enumerate(dataloader):
        torch.cuda.empty_cache()  # libera memória da GPU
        input_ids = batch["input_ids"].to(DEVICE)
        labels = batch["labels"].to(DEVICE)

        outputs = model(input_ids=input_ids, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        total_loss += loss.item()

        # if step % 10 == 0:
        #     print(f"[Época {epoch+1}] Passo {step} | Loss: {loss.item():.4f}")
    
    print(f"Época {epoch+1} finalizada. Loss médio: {total_loss / len(dataloader):.4f}")

# Salvar modelo
output_dir = "modelo_finetunado"
os.makedirs(output_dir, exist_ok=True)
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
print(f"✅ Modelo salvo em: {output_dir}")




tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/569 [00:00<?, ?B/s]

2025-05-27 20:36:53.439394: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748389013.453579  134669 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748389013.457901  134669 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748389013.469174  134669 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748389013.469195  134669 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748389013.469197  134669 computation_placer.cc:177] computation placer alr

model.safetensors:   0%|          | 0.00/2.09G [00:00<?, ?B/s]

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


OutOfMemoryError: CUDA out of memory. Tried to allocate 48.00 MiB. GPU 0 has a total capacity of 7.60 GiB of which 78.94 MiB is free. Including non-PyTorch memory, this process has 7.11 GiB memory in use. Of the allocated memory 6.87 GiB is allocated by PyTorch, and 113.60 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [2]:
from transformers import pipeline

pipe = pipeline("text-generation", model="modelo_finetunado", tokenizer="modelo_finetunado", device=0 if torch.cuda.is_available() else -1)

res = pipe("Me explique o que é um super computador ", max_length=50, do_sample=True, top_k=50)
print(res[0]['generated_text'])


Device set to use cuda:0
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Both `max_new_tokens` (=256) and `max_length`(=50) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


Me explique o que é um super computador ernstar do tradicional fluxo de elétrons, e não sua carga ela supera ela, como sua carga elétrica.\n\nEm 2023, a mesma equipe descobriu comofazer computação usando ondas magnéticas, também conhecidas comoondas de spin, cujas quasipartículas são atualmente usados para redefinir os bits magnéticos, e também com isso carga aí corre dois modosmagnônicos, também conhecidas comoondas de spin que são apenas as ondas de spin magnéticas, como ondas de spin totalmente no mesmo estado, ou aquecimento Joule, dois modosmagnônicosecs, e também com o mesmo comportamento magnético do elétrons, comoondas de spin totalmente não existem, comoondas de spin totalmente não são dispositivos usados para redefinir os bits magnéticos, comoondas de spin totalmente não existem um comportamento magnético, ou aquecs de spin totalmente não são os bits magnéticos, como são os


In [None]:
texto = "O texto completo aqui. Agora, por favor, gere um resumo:"
input = tokenizer.encode(texto, return_tensors="pt").to(DEVICE)
output = model.generate(input, max_length=128)
print(tokenizer.decode(output[0], skip_special_tokens=True))


In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "unicamp-dl/ptt5-base-portuguese-summarizer"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cuda")

texto = "resuma: A inteligência artificial está transformando a indústria da tecnologia. Com o uso de modelos de linguagem, muitas tarefas..."
inputs = tokenizer(texto, return_tensors="pt", truncation=True).to("cuda")
summary_ids = model.generate(inputs["input_ids"], max_length=80, min_length=20)
print(tokenizer.decode(summary_ids[0], skip_special_tokens=True))
