<a href="https://colab.research.google.com/github/daycardoso/bert-vs-modernbert-valueeval24/blob/main/valores_bert_final1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Inicialização

In [1]:
from google.colab import drive
drive.mount('/content/drive')

from google.colab import userdata
import os
os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')
os.environ['WANDB_API_KEY'] = userdata.get('WANDB_API_KEY')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import json
import os # Adicionado para construir os caminhos

# Carregar os Dados em Ingles
home = "/content/drive/MyDrive/Mestrado/DetectionOfHumanValuesInTexts/Colab_Experimentos/"

folder_treino = home + "training-english/"
folder_validacao = home + "validation-english/"
folder_teste = home + "test-english/"

# Json value-categories - Carregue o JSON uma única vez
caminho_json_valores = os.path.join(home, "value-categories.json")
with open(caminho_json_valores, 'r') as f:
    categorias_valores = json.load(f)

# Json value-categories
categorias_valores = json.load(open(home + "value-categories.json"))


# Aplicando o pre-processamento de adição de contexto do Hierocles of Alexandria at Touché

# Treinamento dos modelos para cada direction

In [3]:
!pip install "numpy<2.0"



In [4]:
import os
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [5]:
import os
import gc
import random
import numpy as np
import pandas as pd
import torch
import datasets
import wandb
from scipy.special import expit as sigmoid
from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score
from tqdm.auto import tqdm
from transformers import (
    AutoTokenizer,
    AutoConfig,
    BertForSequenceClassification,
    TrainingArguments,
    Trainer,
    EarlyStoppingCallback,
    DataCollatorWithPadding
)

# Configuração para reduzir fragmentação de memória
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

# --- 1. Inicialização e Configuração Global ---

try:
    wandb.init(
        project='touche_multi_head',
        name='bert_seq_class_19_values_no_context_10epochs'
    )
except Exception as e:
    print(f"W&B initialization failed: {e}. Running without logging.")

VALORES = [
    'Self-direction: thought', 'Self-direction: action', 'Stimulation', 'Hedonism',
    'Achievement', 'Power: dominance', 'Power: resources', 'Face',
    'Security: personal', 'Security: societal', 'Tradition', 'Conformity: rules',
    'Conformity: interpersonal', 'Humility', 'Benevolence: caring',
    'Benevolence: dependability', 'Universalism: concern', 'Universalism: nature',
    'Universalism: tolerance'
]
NUM_LABELS = len(VALORES)
ID2LABEL = {i: l for i, l in enumerate(VALORES)}
LABEL2ID = {l: i for i, l in enumerate(VALORES)}

PRETRAINED_MODEL = 'google-bert/bert-base-uncased'
MAX_LENGTH = 512

# --- 2. Preparação do Tokenizador ---

tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL)

# --- 3. Funções de Processamento de Dados ---

def preprocess_function(examples):
    """Tokeniza os textos do dataset."""
    return tokenizer(examples['Text'], padding='max_length', truncation=True, max_length=MAX_LENGTH)

def load_and_process_dataset(directory, tokenizer_instance):
    """Carrega, mescla, processa e tokeniza o dataset."""
    sentences_file_path = os.path.join(directory, 'final_sentences.tsv')
    labels_file_path = os.path.join(directory, 'final_labels.tsv')

    if not os.path.exists(sentences_file_path) or not os.path.exists(labels_file_path):
        raise FileNotFoundError(f"Arquivos de dataset não encontrados em {directory}")

    key_column_types = {'Text-ID': str, 'Sentence-ID': str}
    try:
        data_df = pd.read_csv(
            sentences_file_path,
            sep='\t',
            dtype=key_column_types,
            usecols=['Text-ID', 'Sentence-ID', 'Text']
        )
    except ValueError as e:
        raise ValueError(f"Verifique se 'final_sentences.tsv' contém as colunas 'Text-ID', 'Sentence-ID' e 'Text'. Erro: {e}")

    labels_df = pd.read_csv(labels_file_path, sep='\t', dtype=key_column_types)
    merged_df = pd.merge(data_df, labels_df, on=['Text-ID', 'Sentence-ID'])

    labels_matrix = merged_df[VALORES].values.astype(np.float32)
    merged_df['labels'] = [row.astype(np.float32) for row in labels_matrix]

    # Exemplo aleatório para inspeção
    random_idx = random.randint(0, len(merged_df) - 1)
    sample_info = {
        'Text-ID': merged_df['Text-ID'][random_idx],
        'Sentence-ID': merged_df['Sentence-ID'][random_idx],
        'Text': merged_df['Text'][random_idx],
        'labels': [ID2LABEL[i] for i, label in enumerate(merged_df['labels'][random_idx]) if label == 1] or 'Nenhum'
    }

    print("\n" + "="*35)
    print(f"=== Exemplo de Texto Pré-processado ({os.path.basename(directory)}) ===")
    print(f"Text-ID: {sample_info['Text-ID']}")
    print(f"Sentence-ID: {sample_info['Sentence-ID']}")
    print(f"Texto: {sample_info['Text']}")
    print(f"Rótulos: {sample_info['labels']}")
    print("="*35 + "\n")

    dataset = datasets.Dataset.from_pandas(merged_df)
    dataset = dataset.map(preprocess_function, batched=True, load_from_cache_file=False)
    valid_cols = ['input_ids', 'attention_mask', 'labels']
    dataset = dataset.remove_columns([c for c in dataset.column_names if c not in valid_cols])
    dataset.set_format("torch")
    return dataset

# --- 4. Função de Métricas ---

def compute_metrics(eval_pred):
    """Calcula métricas para avaliação multi-label."""
    logits, true_labels = eval_pred
    probs = sigmoid(logits)
    preds = (probs > 0.5).astype(int)

    f1_macro = f1_score(true_labels, preds, average='macro', zero_division=0)
    f1_micro = f1_score(true_labels, preds, average='micro', zero_division=0)
    precision_macro = precision_score(true_labels, preds, average='macro', zero_division=0)
    recall_macro = recall_score(true_labels, preds, average='macro', zero_division=0)
    subset_accuracy = (true_labels == preds).all(axis=1).mean()

    auc_scores = []
    for i in range(NUM_LABELS):
        if len(np.unique(true_labels[:, i])) > 1:
            auc_scores.append(roc_auc_score(true_labels[:, i], probs[:, i]))
    roc_auc = np.mean(auc_scores) if auc_scores else float('nan')

    return {
        'subset_accuracy': subset_accuracy,
        'f1_macro': f1_macro,
        'f1_micro': f1_micro,
        'precision_macro': precision_macro,
        'recall_macro': recall_macro,
        'roc_auc': roc_auc
    }



[34m[1mwandb[0m: Currently logged in as: [33mday-cardoso[0m ([33mday-cardoso-ufrgs-universidade-federal-do-rio-grande-do-sul[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [7]:
# --- 5. Lógica Principal de Treinamento ---

def main():
    """Função principal que executa o pipeline."""
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    print(f'Usando dispositivo: {device}')

    print('Carregando e processando datasets...')
    train_ds = load_and_process_dataset(folder_treino, tokenizer)
    val_ds = load_and_process_dataset(folder_validacao, tokenizer)
    test_ds = load_and_process_dataset(folder_teste, tokenizer)
    print('Datasets prontos.')

    config = AutoConfig.from_pretrained(
        PRETRAINED_MODEL,
        num_labels=NUM_LABELS,
        id2label=ID2LABEL,
        label2id=LABEL2ID,
        problem_type='multi_label_classification'
    )
    model = BertForSequenceClassification.from_pretrained(PRETRAINED_MODEL, config=config)

    training_args = TrainingArguments(
        output_dir='bert-seq-class-values-no-context',
        report_to='wandb',
        eval_strategy='steps',
        eval_steps=767,
        save_strategy='steps',
        save_steps=767,
        save_total_limit=2,
        learning_rate=5e-5,
        adam_epsilon=1e-8,
        num_train_epochs=20,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        gradient_accumulation_steps=4,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model='f1_macro',
        greater_is_better=True,
        fp16=True if device == 'cuda' else False,
        lr_scheduler_type='linear',
        warmup_ratio=0.1,
        seed=2025,
        overwrite_output_dir=True,
        push_to_hub=True,
        hub_model_id='DayCardoso/bert-seq-class-values-no-context',
    )

    data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=val_ds,
        data_collator=data_collator,
        compute_metrics=compute_metrics,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=5)],
    )

    print('Iniciando treinamento...')
    torch.cuda.empty_cache()  # Liberar memória antes do treinamento
    train_res = trainer.train()

    metrics = train_res.metrics
    metrics['train_samples'] = len(train_ds)
    trainer.log_metrics('train', metrics)
    trainer.save_model()
    trainer.save_state()
    trainer.save_metrics('train', metrics)

    print('\nAvaliação final no dataset de validação...')
    torch.cuda.empty_cache()  # Liberar memória antes da avaliação
    eval_res = trainer.evaluate()
    eval_res['eval_samples'] = len(val_ds)
    trainer.log_metrics('eval', eval_res)
    trainer.save_metrics('eval', eval_res)

    print('\nAvaliação no dataset de teste...')
    torch.cuda.empty_cache()  # Liberar memória antes do teste
    test_res = trainer.evaluate(eval_dataset=test_ds, metric_key_prefix='test')
    test_res['test_samples'] = len(test_ds)
    trainer.log_metrics('test', test_res)
    trainer.save_metrics('test', test_res)
    print('Teste concluído! Métricas:', test_res)

    wandb.finish()

if __name__ == '__main__':

    main()

Usando dispositivo: cuda
Carregando e processando datasets...

=== Exemplo de Texto Pré-processado () ===
Text-ID: IT_M_005
Sentence-ID: 13
Texto: In addition, the mechanisms provided in other parts of the program (see minimum exempt and negative tax) further ensure the raising of disposable income for the working poor.
Rótulos: ['Universalism: concern']



Map:   0%|          | 0/24534 [00:00<?, ? examples/s]


=== Exemplo de Texto Pré-processado () ===
Text-ID: EN_378
Sentence-ID: 5
Texto: Neither backed him publicly, although Kyrgios has made sniping comments about Hewitt on social media.
Rótulos: ['Conformity: interpersonal']



Map:   0%|          | 0/8099 [00:00<?, ? examples/s]


=== Exemplo de Texto Pré-processado () ===
Text-ID: IT_057
Sentence-ID: 16
Texto: To wage war is to destroy, to have more power." "It is a nonsense of Creation.
Rótulos: ['Security: personal']



Map:   0%|          | 0/7865 [00:00<?, ? examples/s]

Datasets prontos.


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Iniciando treinamento...


Step,Training Loss,Validation Loss,Subset Accuracy,F1 Macro,F1 Micro,Precision Macro,Recall Macro,Roc Auc
767,0.4117,0.211202,0.0,0.0,0.0,0.0,0.0,0.63811
1534,0.1905,0.179161,0.069638,0.047547,0.127488,0.18029,0.03026,0.775041
2301,0.1716,0.168705,0.153229,0.12517,0.2519,0.302705,0.096177,0.804841
3068,0.1619,0.16441,0.198913,0.177066,0.301865,0.356146,0.134131,0.826688
3835,0.1395,0.166917,0.25534,0.240707,0.364912,0.467191,0.191744,0.828292
4602,0.1334,0.163429,0.250525,0.258216,0.354525,0.455675,0.20065,0.835208
5369,0.1032,0.18028,0.304112,0.309283,0.393791,0.405279,0.261491,0.826151
6136,0.0958,0.182628,0.312878,0.301262,0.398871,0.411717,0.262769,0.827723
6903,0.0733,0.207374,0.316088,0.313969,0.400096,0.396962,0.283091,0.814911
7670,0.0655,0.209747,0.309791,0.319455,0.396673,0.394722,0.282386,0.815214


***** train metrics *****
  epoch                    =        17.0
  total_flos               = 102216920GF
  train_loss               =       0.053
  train_runtime            =  1:35:19.38
  train_samples            =       24534
  train_samples_per_second =      85.792
  train_steps_per_second   =       5.364

Avaliação final no dataset de validação...


***** eval metrics *****
  epoch                   =       17.0
  eval_f1_macro           =     0.3465
  eval_f1_micro           =     0.4098
  eval_loss               =     0.3717
  eval_precision_macro    =     0.3655
  eval_recall_macro       =     0.3327
  eval_roc_auc            =     0.7904
  eval_runtime            = 0:00:30.64
  eval_samples            =       8099
  eval_samples_per_second =    264.282
  eval_steps_per_second   =     66.079
  eval_subset_accuracy    =     0.3023

Avaliação no dataset de teste...


early stopping required metric_for_best_model, but did not find eval_f1_macro so early stopping is disabled


***** test metrics *****
  epoch                   =       17.0
  test_f1_macro           =     0.3315
  test_f1_micro           =     0.4022
  test_loss               =     0.3796
  test_precision_macro    =     0.3526
  test_recall_macro       =     0.3192
  test_roc_auc            =      0.785
  test_runtime            = 0:00:31.41
  test_samples            =       7865
  test_samples_per_second =    250.346
  test_steps_per_second   =      62.61
  test_subset_accuracy    =      0.294
Teste concluído! Métricas: {'test_loss': 0.37962791323661804, 'test_subset_accuracy': 0.2939605848696758, 'test_f1_macro': 0.33148461030006626, 'test_f1_micro': 0.4022210654173174, 'test_precision_macro': 0.3525651663916523, 'test_recall_macro': 0.31915286502575685, 'test_roc_auc': 0.7850314242292196, 'test_runtime': 31.4165, 'test_samples_per_second': 250.346, 'test_steps_per_second': 62.61, 'epoch': 17.0, 'test_samples': 7865}


0,1
eval/f1_macro,▁▃▁▂▄▅▆▆▇▇▇▇█████████████████████████
eval/f1_micro,▁▅▁▃▅▆▇▇█████████████████████████████
eval/loss,▂▁▂▁▁▁▁▁▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇████▇
eval/precision_macro,▁▆▁▄▆▆██▇▇▇▇▇▆▇▆▆▇▇▇▆▆▆▆▆▇▆▆▇▆▆▇▇▇▆▆▆
eval/recall_macro,▁▃▁▂▃▄▅▅▆▆▇▇▇▇▇▇▇▇▇▇█▇███▇▇▇███▇▇▇███
eval/roc_auc,▂▆▁▆▇█████▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▆▆▇▆▆▆▆▆▇▆▆
eval/runtime,▁▃▄▄▄▇▇▆▃▃▃▃▆▃▃▆▅▃▃▅▄▅▆▅▅▄▅▇▆▄▅▅▆▄▆▇█
eval/samples_per_second,█▆▅▅▅▂▂▃▅▅▆▆▃▆▅▃▄▆▆▄▅▄▃▄▄▅▄▂▃▄▄▄▃▅▃▂▁
eval/steps_per_second,█▆▅▅▅▂▂▃▅▅▆▆▃▆▅▃▄▆▆▄▅▄▃▄▄▅▄▂▃▄▄▄▃▅▃▂▁
eval/subset_accuracy,▁▄▁▃▄▅▇▇██████████▇████▇▇▇▇▇█████████

0,1
eval/f1_macro,0.34646
eval/f1_micro,0.40977
eval/loss,0.37166
eval/precision_macro,0.36553
eval/recall_macro,0.33274
eval/roc_auc,0.79041
eval/runtime,30.6453
eval/samples_per_second,264.282
eval/steps_per_second,66.079
eval/subset_accuracy,0.30226


In [None]:
# # -*- coding: utf-8 -*-
# """
# Script de treinamento para usar BertForSequenceClassification.
# """

# import os
# import random
# import numpy as np
# import pandas as pd
# import torch
# import datasets
# import wandb
# from scipy.special import expit as sigmoid
# from sklearn.metrics import f1_score, precision_score, recall_score, roc_auc_score
# from tqdm.auto import tqdm
# from transformers import (
#     AutoTokenizer,
#     AutoConfig,
#     BertForSequenceClassification,
#     TrainingArguments,
#     Trainer,
#     EarlyStoppingCallback,
#     DataCollatorWithPadding
# )

# # --- 1. Inicialização e Configuração Global ---

# # Inicializa Weights & Biases para monitoramento
# try:
#     wandb.init(
#         project='touche_multi_head',
#         name='bert_seq_class_19_values_context_refactored'  # Nome alterado para refletir o modelo
#     )
# except Exception as e:
#     print(f"W&B initialization failed: {e}. Running without logging.")

# # Definição dos valores (labels) e tokens especiais
# VALORES = [
#     'Self-direction: thought', 'Self-direction: action', 'Stimulation', 'Hedonism',
#     'Achievement', 'Power: dominance', 'Power: resources', 'Face',
#     'Security: personal', 'Security: societal', 'Tradition', 'Conformity: rules',
#     'Conformity: interpersonal', 'Humility', 'Benevolence: caring',
#     'Benevolence: dependability', 'Universalism: concern', 'Universalism: nature',
#     'Universalism: tolerance'
# ]
# SPECIAL_TOKENS = ['<NONE>'] + [f'<{valor}>' for valor in VALORES]
# NUM_LABELS = len(VALORES)
# ID2LABEL = {i: l for i, l in enumerate(VALORES)}
# LABEL2ID = {l: i for i, l in enumerate(VALORES)}

# PRETRAINED_MODEL = 'google-bert/bert-base-uncased'  # <-- Alterado para BERT padrão
# MAX_LENGTH = 512

# # --- 2. Preparação do Tokenizador ---

# tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL)
# tokenizer.add_special_tokens({'additional_special_tokens': SPECIAL_TOKENS + ['</s>']})

# # --- 3. Funções de Processamento de Dados ---

# def preprocess_function(examples):
#     """Tokeniza os textos do dataset."""
#     return tokenizer(examples['Text'], padding='max_length', truncation=True, max_length=MAX_LENGTH)

# def _add_context_to_text(data_df):
#     """
#     Adiciona sentenças anteriores e seus rótulos como contexto.
#     Esta é uma função auxiliar para manter a lógica de negócio isolada.
#     """
#     data_df = data_df.sort_values(['Text-ID', 'Sentence-ID']).reset_index(drop=True)
#     new_texts = []

#     # Armazena um exemplo aleatório para verificação
#     random_idx = random.randint(0, len(data_df) - 1)
#     sample_info = {}

#     for i in tqdm(range(len(data_df)), desc="Adicionando contexto"):
#         current_text = data_df['Text'][i]
#         context_parts = []

#         # Itera sobre as duas sentenças anteriores, se existirem
#         for j in range(1, 3):
#             prev_idx = i - j
#             if prev_idx >= 0 and data_df['Text-ID'][prev_idx] == data_df['Text-ID'][i]:
#                 prev_text = data_df['Text'][prev_idx]
#                 prev_labels = data_df['labels'][prev_idx]
#                 active_labels = [ID2LABEL[k] for k, label in enumerate(prev_labels) if label == 1]
#                 context_text = prev_text + ' ' + (' '.join([f'<{l}>' for l in active_labels]) if active_labels else '<NONE>')
#                 context_parts.insert(0, context_text)

#         processed_text = ' </s> '.join(context_parts) + ' </s> ' + current_text if context_parts else current_text
#         new_texts.append(processed_text)

#         if i == random_idx:
#             sample_info = {
#                 'Text-ID': data_df['Text-ID'][i],
#                 'Sentence-ID': data_df['Sentence-ID'][i],
#                 'original_text': current_text,
#                 'processed_text': processed_text,
#                 'labels': data_df['labels'][i]
#             }

#     data_df['Text'] = new_texts
#     return data_df, sample_info

# def load_and_process_dataset(directory, tokenizer_instance):
#     """
#     Carrega, mescla, processa e tokeniza o dataset de um diretório.
#     """
#     sentences_file_path = os.path.join(directory, 'final_sentences.tsv')
#     labels_file_path = os.path.join(directory, 'final_labels.tsv')

#     if not os.path.exists(sentences_file_path) or not os.path.exists(labels_file_path):
#         raise FileNotFoundError(f"Arquivos de dataset não encontrados em {directory}")

#     key_column_types = {'Text-ID': str, 'Sentence-ID': str}
#     try:
#         data_df = pd.read_csv(
#             sentences_file_path,
#             sep='\t',
#             dtype=key_column_types,
#             usecols=['Text-ID', 'Sentence-ID', 'Text']
#         )
#     except ValueError as e:
#         raise ValueError(f"Verifique se 'final_sentences.tsv' contém as colunas 'Text-ID', 'Sentence-ID' e 'Text'. Erro: {e}")

#     labels_df = pd.read_csv(labels_file_path, sep='\t', dtype=key_column_types)
#     merged_df = pd.merge(data_df, labels_df, on=['Text-ID', 'Sentence-ID'])

#     labels_matrix = merged_df[VALORES].values.astype(np.float32)
#     merged_df['labels'] = [row.astype(np.float32) for row in labels_matrix]

#     processed_df, sample = _add_context_to_text(merged_df)

#     print("\n" + "="*35)
#     print(f"=== Exemplo de Texto Pré-processado ({os.path.basename(directory)}) ===")
#     print(f"Text-ID: {sample['Text-ID']}")
#     print(f"Sentence-ID: {sample['Sentence-ID']}")
#     print(f"Texto Original: {sample['original_text']}")
#     print(f"Texto com Contexto: {sample['processed_text']}")
#     active_labels = [ID2LABEL[i] for i, label in enumerate(sample['labels'].tolist()) if label == 1]
#     print(f"Rótulos: {active_labels if active_labels else 'Nenhum'}")
#     print("="*35 + "\n")

#     dataset = datasets.Dataset.from_pandas(processed_df)
#     dataset = dataset.map(preprocess_function, batched=True, load_from_cache_file=False)
#     valid_cols = ['input_ids', 'attention_mask', 'labels']
#     dataset = dataset.remove_columns([c for c in dataset.column_names if c not in valid_cols])
#     dataset.set_format("torch")

#     return dataset

# # --- 4. Função de Métricas ---

# def compute_metrics(p):
#     """Calcula e retorna um dicionário de métricas de classificação multi-label."""
#     logits = p.predictions
#     true_labels = p.label_ids
#     probs = sigmoid(logits)
#     preds = (probs > 0.5).astype(int)

#     f1_macro = f1_score(true_labels, preds, average='macro', zero_division=0)
#     f1_micro = f1_score(true_labels, preds, average='micro', zero_division=0)
#     precision_macro = precision_score(true_labels, preds, average='macro', zero_division=0)
#     recall_macro = recall_score(true_labels, preds, average='macro', zero_division=0)
#     subset_accuracy = (true_labels == preds).all(axis=1).mean()

#     auc_scores = []
#     for i in range(NUM_LABELS):
#         if len(np.unique(true_labels[:, i])) > 1:
#             auc_scores.append(roc_auc_score(true_labels[:, i], probs[:, i]))
#     roc_auc = np.mean(auc_scores) if auc_scores else float('nan')

#     return {
#         'subset_accuracy': subset_accuracy,
#         'f1_macro': f1_macro,
#         'f1_micro': f1_micro,
#         'precision_macro': precision_macro,
#         'recall_macro': recall_macro,
#         'roc_auc': roc_auc
#     }

# # --- 5. Lógica Principal de Treinamento ---

# def main():
#     """Função principal que executa todo o pipeline."""

#     print('Carregando e processando datasets...')
#     train_ds = load_and_process_dataset(folder_treino, tokenizer)
#     val_ds = load_and_process_dataset(folder_validacao, tokenizer)
#     print('Datasets prontos.')

#     # Configuração de treinamento
#     training_args = TrainingArguments(
#         output_dir='bert-base-seq-class-values-context',
#         report_to='wandb',
#         eval_strategy='steps',
#         eval_steps=767,
#         save_strategy='steps',
#         save_steps=767,
#         save_total_limit=2,
#         learning_rate=5e-6,
#         adam_epsilon=1e-8,
#         num_train_epochs=33,
#         per_device_train_batch_size=2,
#         per_device_eval_batch_size=2,
#         gradient_accumulation_steps=8,
#         weight_decay=0.01,
#         load_best_model_at_end=True,
#         metric_for_best_model='f1_macro',
#         greater_is_better=True,
#         fp16=True,
#         lr_scheduler_type='linear',
#         warmup_ratio=0.01,
#         seed=2025,
#         overwrite_output_dir=True,
#         push_to_hub=True,
#         hub_model_id='DayCardoso/bert-base-multi-head-values-context',
#     )

#     print('Inicializando modelo...')
#     config = AutoConfig.from_pretrained(
#         PRETRAINED_MODEL,
#         num_labels=NUM_LABELS,
#         id2label=ID2LABEL,
#         label2id=LABEL2ID,
#         problem_type='multi_label_classification',
#     )

#     model = BertForSequenceClassification.from_pretrained(
#         PRETRAINED_MODEL,
#         config=config,
#     )

#     model.resize_token_embeddings(len(tokenizer))

#     data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

#     trainer = Trainer(
#         model=model,
#         args=training_args,
#         train_dataset=train_ds,
#         eval_dataset=val_ds,
#         compute_metrics=compute_metrics,
#         data_collator=data_collator,
#         callbacks=[EarlyStoppingCallback(early_stopping_patience=9)],
#     )

#     print('Iniciando treinamento...')
#     train_res = trainer.train()

#     metrics = train_res.metrics
#     metrics['train_samples'] = len(train_ds)
#     trainer.log_metrics('train', metrics)
#     trainer.save_model()
#     trainer.save_state()
#     trainer.save_metrics('train', metrics)

#     print('\nAvaliação final no dataset de validação...')
#     eval_res = trainer.evaluate()
#     eval_res['eval_samples'] = len(val_ds)
#     trainer.log_metrics('eval', eval_res)
#     trainer.save_metrics('eval', eval_res)
#     print('Treinamento concluído! Métricas de avaliação:', eval_res)

#     wandb.finish()

# if __name__ == '__main__':
#     main()