In [1]:
%pip install -q transformers datasets 
%pip install transformers[torch]

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd 

import torch 

from sklearn.model_selection import train_test_split 

from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments 

from datasets import Dataset 

import numpy as np 

from sklearn.metrics import accuracy_score, precision_recall_fscore_support




In [3]:
df = pd.read_csv(r"C:\Users\arbed\Downloads\dataset_sentimientos_500.csv") 

df.columns = df.columns.str.strip() 

df = df[['Reseña', 'Sentimiento']].dropna() 

df['Sentimiento'] = df['Sentimiento'].map({'Positiva': 1, 'Negativa': 0})

In [4]:
train_texts, test_texts, train_labels, test_labels = train_test_split( 

    df['Reseña'].tolist(), df['Sentimiento'].tolist(), test_size=0.2, random_state=42 

)

In [5]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') 

train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=128) 

test_encodings = tokenizer(test_texts, truncation=True, padding=True, max_length=128)

In [6]:
train_dataset = Dataset.from_dict({ 

    'input_ids': train_encodings['input_ids'], 

    'attention_mask': train_encodings['attention_mask'], 

    'labels': train_labels 

}) 

test_dataset = Dataset.from_dict({ 

    'input_ids': test_encodings['input_ids'], 

    'attention_mask': test_encodings['attention_mask'], 

    'labels': test_labels 

})

In [7]:
def compute_metrics(eval_pred): 

    logits, labels = eval_pred 

    preds = np.argmax(logits, axis=-1) 

    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary') 

    acc = accuracy_score(labels, preds) 

    return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}

In [8]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
training_args = TrainingArguments( 

    output_dir="/results", 

    num_train_epochs=3, 

    per_device_train_batch_size=8, 

    per_device_eval_batch_size=8, 

    logging_dir="/logs", 

    logging_steps=10, 

    save_steps=50, 

    save_total_limit=1 

)

In [10]:
trainer = Trainer( 

    model=model, 

    args=training_args, 

    train_dataset=train_dataset, 

    eval_dataset=test_dataset, 

    compute_metrics=compute_metrics 

) 

trainer.train() 



Step,Training Loss
10,0.654
20,0.3707
30,0.1029
40,0.0203
50,0.0053
60,0.0027
70,0.0019
80,0.0015
90,0.0012
100,0.0011




TrainOutput(global_step=150, training_loss=0.07774796516944965, metrics={'train_runtime': 112.381, 'train_samples_per_second': 10.678, 'train_steps_per_second': 1.335, 'total_flos': 11716664184000.0, 'train_loss': 0.07774796516944965, 'epoch': 3.0})

In [11]:
results = trainer.evaluate() 

print("Resultados:", results) 



Resultados: {'eval_loss': 0.0007064539240673184, 'eval_accuracy': 1.0, 'eval_f1': 1.0, 'eval_precision': 1.0, 'eval_recall': 1.0, 'eval_runtime': 4.8473, 'eval_samples_per_second': 20.63, 'eval_steps_per_second': 2.682, 'epoch': 3.0}
