In [3]:
!pip install transformers datasets



In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from google.colab import drive

# Montar Google Drive
drive.mount('/content/drive')

# Carregar os dados
dataset = load_dataset('csv', data_files={
                                          'train': '/content/drive/MyDrive/Colab Notebooks/df_stj_train_fsl_5_shot.csv',
                                          'validation': '/content/drive/MyDrive/Colab Notebooks/df_stj_validacao.csv',
                                          'test': '/content/drive/MyDrive/Colab Notebooks/df_stj_teste.csv'
                                          })

# Tokenizador e modelo JurisBERT
tokenizer = AutoTokenizer.from_pretrained("alfaneo/jurisbert-base-portuguese-uncased")
model = AutoModelForSequenceClassification.from_pretrained("alfaneo/jurisbert-base-portuguese-uncased", num_labels=7)

Mounted at /content/drive


Generating train split: 0 examples [00:00, ? examples/s]

Generating validation split: 0 examples [00:00, ? examples/s]

Generating test split: 0 examples [00:00, ? examples/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/376 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/254k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/482k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/829 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at alfaneo/jurisbert-base-portuguese-uncased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
# Label map para o treinamento (7 classes)
label_map_train = {'DIREITO ADMINISTRATIVO': 0,
                   'DIREITO CIVIL': 1,
                   'DIREITO DA CRIANÇA E DO ADOLESCENTE': 2,
                   'DIREITO DO CONSUMIDOR': 3,
                   'DIREITO EMPRESARIAL': 4,
                   'DIREITO PENAL': 5,
                   'DIREITO AMBIENTAL': 6
}

# Label map para o validação (7 classes)
label_map_validation = {'DIREITO ADMINISTRATIVO': 0,
                        'DIREITO CIVIL': 1,
                        'DIREITO DA CRIANÇA E DO ADOLESCENTE': 2,
                        'DIREITO DO CONSUMIDOR': 3,
                        'DIREITO EMPRESARIAL': 4,
                        'DIREITO PENAL': 5,
                        'DIREITO AMBIENTAL': 6
}

# Label map para o teste (7 classes)
label_map_test = {'DIREITO ADMINISTRATIVO': 0,
                  'DIREITO CIVIL': 1,
                  'DIREITO DA CRIANÇA E DO ADOLESCENTE': 2,
                  'DIREITO DO CONSUMIDOR': 3,
                  'DIREITO EMPRESARIAL': 4,
                  'DIREITO PENAL': 5,
                  'DIREITO AMBIENTAL': 6
}

def preprocess_training_data(examples):
    examples["label"] = [label_map_train[label] for label in examples["MATERIA"]]
    return tokenizer(examples["EMENTA"], padding="max_length", truncation=True, max_length=512)

def preprocess_validation_data(examples):
    examples["label"] = [label_map_validation[label] for label in examples["MATERIA"]]
    return tokenizer(examples["EMENTA"], padding="max_length", truncation=True, max_length=512)

def preprocess_testing_data(examples):
    examples["label"] = [label_map_test[label] for label in examples["MATERIA"]]
    return tokenizer(examples["EMENTA"], padding="max_length", truncation=True, max_length=512)

encoded_dataset = {}

# Pré-processando o conjunto de treinamento
encoded_dataset["train"] = dataset["train"].map(preprocess_training_data, batched=True)

# Pré-processando o conjunto de validação
encoded_dataset["validation"] = dataset["validation"].map(preprocess_validation_data, batched=True)

# Pré-processando o conjunto de teste
encoded_dataset["test"] = dataset["test"].map(preprocess_testing_data, batched=True)

Map:   0%|          | 0/5145 [00:00<?, ? examples/s]

Map:   0%|          | 0/1112 [00:00<?, ? examples/s]

Map:   0%|          | 0/1111 [00:00<?, ? examples/s]

In [6]:
# Argumentos de treinamento
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    learning_rate=2e-5,
    weight_decay=0.01,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    push_to_hub=False,
    report_to="none",
)

# Função de métricas
def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average="weighted")
    precision = precision_score(labels, preds, average="weighted", zero_division=0)
    recall = recall_score(labels, preds, average="weighted", zero_division=0)

     # Calcular a acurácia harmônica
    acc_seen = accuracy_score(labels[labels != -100], preds[labels != -100])  # Acurácia das classes vistas
    acc_unseen = accuracy_score(labels[labels == -100], preds[labels == -100])  # Acurácia das classes não vistas
    harmonic_mean = 2 * (acc_seen * acc_unseen) / (acc_seen + acc_unseen) if (acc_seen + acc_unseen) > 0 else 0

    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall, "harmonic_mean": harmonic_mean}

In [7]:
# Configuração do treinador
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["validation"],
    compute_metrics=compute_metrics
)

# Treinar o modelo
trainer.train()

# Avaliar o modelo
eval_results = trainer.evaluate()
print(f"Accuracy: {eval_results['eval_accuracy']}")
print(f"F1 Score: {eval_results['eval_f1']}")
print(f"Precision: {eval_results['eval_precision']}")
print(f"Recall: {eval_results['eval_recall']}")
print(f"Acurácia Harmônica: {eval_results['eval_harmonic_mean']}")

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall,Harmonic Mean
1,No log,0.361739,0.879496,0.872275,0.874287,0.879496,0
2,0.454300,0.328938,0.885791,0.879088,0.892411,0.885791,0
3,0.454300,0.303384,0.901079,0.898868,0.897944,0.901079,0


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)


Accuracy: 0.9010791366906474
F1 Score: 0.8988677407577772
Precision: 0.8979435452522316
Recall: 0.9010791366906474
Acurácia Harmônica: 0


  avg = a.mean(axis, **keepdims_kw)
  ret = ret.dtype.type(ret / rcount)
