In [5]:
!pip install transformers datasets
!pip install torch



In [13]:
from transformers import BertTokenizerFast, BertForSequenceClassification, TrainingArguments, Trainer, EvalPrediction
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from google.colab import drive
import torch

# Carregar os dados
drive.mount('/content/drive')
dataset = load_dataset('csv', data_files={
                                          'test': '/content/drive/MyDrive/Colab Notebooks/df_tcu_teste.csv'
                                          })

# Tokenizador e modelo BERTimbau
tokenizer = BertTokenizerFast.from_pretrained("neuralmind/bert-base-portuguese-cased")
model = BertForSequenceClassification.from_pretrained("neuralmind/bert-base-portuguese-cased", num_labels=4)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at neuralmind/bert-base-portuguese-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
label_map_test = {
    "Licitação": 0,
    "Pessoal": 1,
    "Responsabilidade": 2,
    "Direito Processual": 3
}

def preprocess_testing_data(examples):
    examples["label"] = [label_map_test[label] for label in examples["AREA"]]
    return tokenizer(examples["VOTO"], padding="max_length", truncation=True, max_length=512)

encoded_dataset = {}

# Pré-processando o conjunto de teste
encoded_dataset["test"] = dataset["test"].map(preprocess_testing_data, batched=True)

# Função para calcular métricas
def compute_metrics(predictions, labels):
    preds = predictions.argmax(axis=-1)  # Classe predita
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    precision = precision_score(labels, preds, average="weighted", zero_division=0)
    recall = recall_score(labels, preds, average="weighted", zero_division=0)
    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

Map:   0%|          | 0/56 [00:00<?, ? examples/s]

In [15]:
# Avaliação no conjunto de teste
model.eval()
all_predictions = []
all_labels = []

with torch.no_grad():
    # Iterate over the items in encoded_dataset["test"]
    for example in encoded_dataset["test"]:
        # Prepara as entradas para o modelo
        inputs = {
            "input_ids": torch.tensor(example["input_ids"]).unsqueeze(0),
            "attention_mask": torch.tensor(example["attention_mask"]).unsqueeze(0)
        }
        # Gera logits
        outputs = model(**inputs)
        logits = outputs.logits
        all_predictions.append(logits.cpu().numpy())
        all_labels.append(example["label"])

# Converter para arrays e calcular métricas
all_predictions = np.vstack(all_predictions)
all_labels = np.array(all_labels)
metrics = compute_metrics(all_predictions, all_labels)

# Exibir resultados
print(f"Accuracy: {metrics['accuracy']}")
print(f"F1 Score: {metrics['f1']}")
print(f"Precision: {metrics['precision']}")
print(f"Recall: {metrics['recall']}")

Accuracy: 0.19642857142857142
F1 Score: 0.15580357142857146
Precision: 0.14161433804290946
Recall: 0.19642857142857142
