# SPR 2026 - BERTimbau (BERT Português)

**Fine-tuning BERTimbau-base para classificação**

- ✅ Hugging Face Trainer API
- ✅ F1-Macro como métrica
- ✅ Tempo esperado: ~15-20 min

---
**CONFIGURAÇÃO KAGGLE:**
1. Settings → Internet → **OFF**
2. Settings → Accelerator → **GPU T4 x2**
3. Add Data → Datasets → Buscar: `bertimbau`
   - Selecionar: **BERTimbau_Portuguese** (Fernandosr85)
4. **IMPORTANTE:** Execute "Run All" após commit
---

In [None]:
# ===== SPR 2026 - BERTIMBAU (CONSOLIDADO) =====

# ==== SETUP E IMPORTS ====
print("[1/6] Configurando ambiente...")
import os
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
import warnings
warnings.filterwarnings('ignore')

SEED = 42
DATA_DIR = '/kaggle/input/spr-2026-mammography-report-classification'

# Paths possíveis para o modelo (datasets do Kaggle)
MODEL_PATHS = [
    '/kaggle/input/bertimbau-portuguese',                    # Fernandosr85
    '/kaggle/input/bertimbau-portuguese-cased',              
    '/kaggle/input/bert-base-portuguese-cased',              # Upload próprio
    '/kaggle/input/bertimbau_portuguese',
]

MODEL_PATH = None
for path in MODEL_PATHS:
    if os.path.exists(path):
        MODEL_PATH = path
        break

if MODEL_PATH is None:
    print("\n⚠️ Modelo não encontrado. Datasets disponíveis:")
    for item in os.listdir('/kaggle/input'):
        print(f"  - {item}")
    raise FileNotFoundError("Adicione o dataset 'BERTimbau_Portuguese' (Fernandosr85) ao notebook!")

MAX_LENGTH = 512
BATCH_SIZE = 8
EPOCHS = 3
LR = 2e-5

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')
print(f'Model: {MODEL_PATH}')
np.random.seed(SEED)
torch.manual_seed(SEED)