In [1]:
!pip install transformers datasets accelerate peft bitsandbytes torch

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from datasets import Dataset
from peft import get_peft_model, LoraConfig, TaskType

In [3]:
# Nome do modelo
MODEL_NAME = "tiiuae/falcon-7b"

In [4]:
# Carregar o tokenizador
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/281 [00:00<?, ?B/s]

In [5]:
tokenizer.pad_token = tokenizer.eos_token

In [6]:

# Carregar o modelo com quantização para economizar memória
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,  # Usa menos VRAM
    device_map="auto"  # Distribui automaticamente entre CPU e GPU
)

config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/17.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.95G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.48G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

In [7]:
data = [
    {"input": "O que é inteligência artificial?", "output": "É a capacidade das máquinas de realizar tarefas que normalmente exigiriam inteligência humana."},
    {"input": "Quem foi Alan Turing?", "output": "Alan Turing foi um matemático e cientista da computação britânico, considerado um dos pais da computação moderna."}
]

In [8]:
# Converter para Dataset Hugging Face
dataset = Dataset.from_list(data)

In [17]:
def tokenize_function(examples):
    prompt = "Pergunta: " + examples["input"] + "\nResposta: " + examples["output"]

    # Tokenizar entrada e saída juntas
    tokens = tokenizer(prompt, truncation=True, padding="max_length", max_length=512)

    # Criar labels: rótulos são os mesmos input_ids, mas ignoramos o padding (-100)
    tokens["labels"] = tokens["input_ids"].copy()
    tokens["labels"] = [
        -100 if token == tokenizer.pad_token_id else token for token in tokens["labels"]
    ]

    return tokens


In [18]:
# Tokenizar o dataset
dataset = dataset.map(tokenize_function)

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [19]:
dataset

Dataset({
    features: ['input', 'output', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 2
})

In [12]:
# Configurar LoRA
lora_config = LoraConfig(
    r=8,  # Define o tamanho das matrizes auxiliares LoRA
    lora_alpha=32,  # Define a escala do ajuste LoRA
    lora_dropout=0.05,  # Adiciona dropout para evitar overfitting
    bias="none",
    task_type=TaskType.CAUSAL_LM  # Define o modelo como um "causal language model"
)

In [13]:
# Aplicar LoRA ao Falcon 7B
model = get_peft_model(model, lora_config)

In [14]:
# Exibir os parâmetros treináveis
model.print_trainable_parameters()

trainable params: 2,359,296 || all params: 6,924,080,000 || trainable%: 0.0341


In [15]:
# Configurar os hiperparâmetros do treinamento
training_args = TrainingArguments(
    output_dir="./falcon-7b-lora-finetuned",  # Onde salvar o modelo treinado
    per_device_train_batch_size=2,  # Usa batch pequeno para economizar VRAM
    gradient_accumulation_steps=4,  # Simula batch maior sem estourar a VRAM
    num_train_epochs=3,  # Número de épocas de treinamento
    learning_rate=2e-5,  # Taxa de aprendizado otimizada para LoRA (0.00002)
    logging_dir="./logs",  # Diretório de logs para análise
    logging_steps=10,  # Salvar logs a cada 10 steps
    save_strategy="epoch",  # Salvar checkpoints no final de cada época
    fp16=True,  # Usa FP16 para reduzir o consumo de VRAM
    push_to_hub=False,  # Se quiser salvar no Hugging Face, mude para True
    report_to="none"  # 🚀 Isso desativa o W&B corretamente!
)

In [20]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

# Iniciar o treinamento
trainer.train()

Step,Training Loss


TrainOutput(global_step=3, training_loss=1.4106299082438152, metrics={'train_runtime': 2.265, 'train_samples_per_second': 2.649, 'train_steps_per_second': 1.324, 'total_flos': 122178556919808.0, 'train_loss': 1.4106299082438152, 'epoch': 3.0})

In [21]:
# Salvar modelo treinado
model.save_pretrained("./falcon-7b-lora-finetuned")
tokenizer.save_pretrained("./falcon-7b-lora-finetuned")

('./falcon-7b-lora-finetuned/tokenizer_config.json',
 './falcon-7b-lora-finetuned/special_tokens_map.json',
 './falcon-7b-lora-finetuned/tokenizer.json')

In [26]:
# Testar geração de texto
input_text = "Quem descobriu o Brasil?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")

In [1]:
# Gerar resposta com o modelo treinado
output = model.generate(
    input_ids,
    attention_mask=input_ids.ne(tokenizer.pad_token_id),
    max_length=500,
    temperature=1.0,
    top_p=0.9,
    repetition_penalty=1.2,  # 🚀 Penaliza palavras repetidas
    do_sample=True
)

generated_text = tokenizer.decode(output[0], skip_special_tokens=True)

print("\n🔹 Resposta Gerada:\n", generated_text)

NameError: name 'model' is not defined