In [1]:
pip install transformers datasets

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [10]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from google.colab import drive

# Montar Google Drive
drive.mount('/content/drive')

# Carregar os dados
dataset = load_dataset('csv', data_files={
                                          'train': '/content/drive/MyDrive/Colab Notebooks/df_stj_train_zsl.csv',
                                          'validation': '/content/drive/MyDrive/Colab Notebooks/df_stj_validacao_zsl.csv',
                                          'test': '/content/drive/MyDrive/Colab Notebooks/df_stj_teste.csv'
                                          })

# Tokenizador e modelo JurisBERT
tokenizer = AutoTokenizer.from_pretrained("alfaneo/jurisbert-base-portuguese-uncased")
model = AutoModelForSequenceClassification.from_pretrained("alfaneo/jurisbert-base-portuguese-uncased", num_labels=7)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alfaneo/jurisbert-base-portuguese-uncased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
# Label map para o treinamento (6 classes)
label_map_train = {'DIREITO ADMINISTRATIVO': 0,
                   'DIREITO CIVIL': 1,
                   'DIREITO DA CRIANÇA E DO ADOLESCENTE': 2,
                   'DIREITO DO CONSUMIDOR': 3,
                   'DIREITO EMPRESARIAL': 4,
                   'DIREITO PENAL': 5
}

# Label map para o validação (6 classes)
label_map_validation = {'DIREITO ADMINISTRATIVO': 0,
                        'DIREITO CIVIL': 1,
                        'DIREITO DA CRIANÇA E DO ADOLESCENTE': 2,
                        'DIREITO DO CONSUMIDOR': 3,
                        'DIREITO EMPRESARIAL': 4,
                        'DIREITO PENAL': 5
}

# Label map para o teste (7 classes, incluindo a classe não vista)
label_map_test = {'DIREITO ADMINISTRATIVO': 0,
                  'DIREITO CIVIL': 1,
                  'DIREITO DA CRIANÇA E DO ADOLESCENTE': 2,
                  'DIREITO DO CONSUMIDOR': 3,
                  'DIREITO EMPRESARIAL': 4,
                  'DIREITO PENAL': 5,
                  'DIREITO AMBIENTAL': 6
}

def preprocess_training_data(examples):
    examples["label"] = [label_map_train[label] for label in examples["MATERIA"]]
    return tokenizer(examples["EMENTA"], padding="max_length", truncation=True, max_length=512)

def preprocess_validation_data(examples):
    examples["label"] = [label_map_validation[label] for label in examples["MATERIA"]]
    return tokenizer(examples["EMENTA"], padding="max_length", truncation=True, max_length=512)

def preprocess_testing_data(examples):
    examples["label"] = [label_map_test[label] for label in examples["MATERIA"]]
    return tokenizer(examples["EMENTA"], padding="max_length", truncation=True, max_length=512)

encoded_dataset = {}

# Pré-processando o conjunto de treinamento
encoded_dataset["train"] = dataset["train"].map(preprocess_training_data, batched=True)

# Pré-processando o conjunto de validação
encoded_dataset["validation"] = dataset["validation"].map(preprocess_validation_data, batched=True)

# Pré-processando o conjunto de teste
encoded_dataset["test"] = dataset["test"].map(preprocess_testing_data, batched=True)

Map:   0%|          | 0/5140 [00:00<?, ? examples/s]

Map:   0%|          | 0/1108 [00:00<?, ? examples/s]

Map:   0%|          | 0/1111 [00:00<?, ? examples/s]

In [16]:
# Argumentos de treinamento
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    learning_rate=2e-5,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,
    report_to="none",
)

# Função de métricas
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    precision = precision_score(labels, preds, average="weighted", zero_division=0)
    recall = recall_score(labels, preds, average="weighted", zero_division=0)

     # Calcular a acurácia harmônica
    acc_seen = accuracy_score(labels[labels != -100], preds[labels != -100])  # Acurácia das classes vistas
    acc_unseen = accuracy_score(labels[labels == -100], preds[labels == -100])  # Acurácia das classes não vistas
    harmonic_mean = 2 * (acc_seen * acc_unseen) / (acc_seen + acc_unseen) if (acc_seen + acc_unseen) > 0 else 0

    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall, "harmonic_mean": harmonic_mean}

In [17]:
# Configuração do treinador
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    compute_metrics=compute_metrics
)

# Treinar o modelo
trainer.train()

# Avaliar o modelo
eval_results = trainer.evaluate()
print(f"Accuracy: {eval_results['eval_accuracy']}")
print(f"F1 Score: {eval_results['eval_f1']}")
print(f"Precision: {eval_results['eval_precision']}")
print(f"Recall: {eval_results['eval_recall']}")
print(f"Acurácia Harmônica: {eval_results['eval_harmonic_mean']}")

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Harmonic Mean
1,No log,0.352097,0.902527,0.899578,0.89952,0.902527,0
2,0.227400,0.328512,0.898917,0.896377,0.896161,0.898917,0
3,0.227400,0.348552,0.905235,0.904683,0.904757,0.905235,0


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


Accuracy: 0.8989169675090253
F1 Score: 0.8963773109766019
Precision: 0.8961609694732737
Recall: 0.8989169675090253
Acurácia Harmônica: 0


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
