In [None]:
!pip install transformers emoji accelerate datasets huggingface_hub ipywidgets scikit-learn

In [5]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from transformers import pipeline, AutoTokenizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [7]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:1


# 1. Carga de datos

In [8]:
df = pd.read_csv('amazonbaby5000.csv')
df

Unnamed: 0,review,rating
0,I think it is more Expensive than drugstore th...,0
1,"When I saw this on Amazon, I put it into my wi...",1
2,We really like these valances. They have such...,1
3,No light emits from the night light. They pain...,0
4,I was really hoping for this to be a conventie...,0
...,...,...
4995,I like that this carrier is like the Moby in h...,1
4996,The box was damaged upon arrival. I was afraid...,1
4997,Purchased for graduation. Rec'd in 2 days like...,1
4998,For all of the reviews that said this car seat...,0


# 2. Partición de datos

In [4]:
X_train, X_test, y_train, y_test = train_test_split(df['review'].copy(), df['rating'].copy(), test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [5]:
X_train

3444    Our little guy couldn't sleep unless he was se...
2063    works fine easy to use . The sound is fine as ...
3714    I took my 5 month old daughter to a toy store ...
2671    i loved this product untill it broke which was...
2154    My son loves his pajamas with feet, and I didn...
                              ...                        
3200    All I know is my Grandson sleep like a Baby, n...
617     My son has CP and this is one of his favorite ...
1992    This is an older version of the Jessica bag.  ...
3301    I did not like it  jejeje  is a hole down and ...
3687    These bottles work much better than regular fe...
Name: review, Length: 3200, dtype: object

# 3.a Transformer (modelo predeterminado)

In [6]:
# Create a sentiment analysis pipeline with the default model
sentiment_pipeline = pipeline("sentiment-analysis", device=1)

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [7]:
def get_sentiment(pipeline, text, max_length=512):
    if len(text) > max_length:
      text = text[:max_length]
    return pipeline(text)

get_sentiment(sentiment_pipeline, X_test.tolist()[0])

[{'label': 'POSITIVE', 'score': 0.9997207522392273}]

In [8]:
# Make predictions on the test set
predictions_default_model = [get_sentiment(sentiment_pipeline, review) for review in X_test]

# Convert the predictions to 1 or 0 sentiments
predictions_default_model = [1 if prediction[0]['label'] == 'POSITIVE' else 0 for prediction in predictions_default_model]

# Metrics
test_accuracy = accuracy_score(y_test, predictions_default_model)
test_precision = precision_score(y_test, predictions_default_model, average='weighted')
test_recall = recall_score(y_test, predictions_default_model, average='weighted')
test_f1_score = f1_score(y_test, predictions_default_model, average='weighted')

print(f'Accuracy of default model: {test_accuracy}')
print(f'Precision of default model: {test_precision}')
print(f'Recall of default model: {test_recall}')
print(f'F1 Score of default model: {test_f1_score}')





Accuracy of default model: 0.799
Precision of default model: 0.845389654021389
Recall of default model: 0.799
F1 Score of default model: 0.8105414968381266


# 3.b Transformer (fine-tunning)

In [9]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [14]:
from transformers import DataCollatorWithPadding, AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2).to(device)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_projector.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.weight', 'classifier.bias', 'pre_classifier.we

In [12]:

def compute_metrics(eval_pred):
   load_accuracy = load_metric("accuracy")
   load_f1 = load_metric("f1")

   logits, labels = eval_pred
   predictions = np.argmax(logits, axis=-1)
   accuracy = load_accuracy.compute(predictions=predictions, references=labels)["accuracy"]
   f1 = load_f1.compute(predictions=predictions, references=labels)["f1"]
   return {"accuracy": accuracy, "f1": f1}

In [15]:
import torch
from torch.utils.data import Dataset

class AmazonReviewDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Tokenización
train_encodings = tokenizer(X_train.tolist(), truncation=True, padding=True)
val_encodings = tokenizer(X_val.tolist(), truncation=True, padding=True)

# Conversión a conjuntos de datos de HuggingFace
train_dataset = AmazonReviewDataset(train_encodings, y_train.tolist())
val_dataset = AmazonReviewDataset(val_encodings, y_val.tolist())



In [18]:
def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)
    accuracy = accuracy_score(y_true=labels, y_pred=pred)
    recall = recall_score(y_true=labels, y_pred=pred)
    precision = precision_score(y_true=labels, y_pred=pred)
    f1 = f1_score(y_true=labels, y_pred=pred)

    return {"accuracy": accuracy, "recall": recall, "precision": precision, "f1": f1}

In [23]:
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback

repo_name = "finetuning-sentiment-model-3000-samples"

training_args = TrainingArguments(
   output_dir="./results",
   learning_rate=2e-5,
   per_device_train_batch_size=16,
   per_device_eval_batch_size=16,
   num_train_epochs=10,
   weight_decay=0.01,
   save_strategy="steps",
   evaluation_strategy="steps",
   eval_steps=500,
   load_best_model_at_end=True,
  #  push_to_hub=True,
)

trainer = Trainer(
   model=model,
   args=training_args,
   train_dataset=train_dataset,
   eval_dataset=val_dataset,
   tokenizer=tokenizer,
   data_collator=data_collator,
   compute_metrics=compute_metrics,
   callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

In [24]:
trainer.train()



  0%|          | 0/1000 [00:00<?, ?it/s]



{'loss': 0.049, 'learning_rate': 1e-05, 'epoch': 5.0}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 0.47087767720222473, 'eval_accuracy': 0.91, 'eval_recall': 0.932475884244373, 'eval_precision': 0.9508196721311475, 'eval_f1': 0.9415584415584415, 'eval_runtime': 8.8841, 'eval_samples_per_second': 90.049, 'eval_steps_per_second': 2.814, 'epoch': 5.0}




{'loss': 0.0092, 'learning_rate': 0.0, 'epoch': 10.0}


  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 0.48928511142730713, 'eval_accuracy': 0.915, 'eval_recall': 0.9437299035369775, 'eval_precision': 0.9467741935483871, 'eval_f1': 0.9452495974235104, 'eval_runtime': 12.5817, 'eval_samples_per_second': 63.584, 'eval_steps_per_second': 1.987, 'epoch': 10.0}
{'train_runtime': 795.5413, 'train_samples_per_second': 40.224, 'train_steps_per_second': 1.257, 'train_loss': 0.029089210987091066, 'epoch': 10.0}


TrainOutput(global_step=1000, training_loss=0.029089210987091066, metrics={'train_runtime': 795.5413, 'train_samples_per_second': 40.224, 'train_steps_per_second': 1.257, 'train_loss': 0.029089210987091066, 'epoch': 10.0})

In [21]:
trainer.evaluate()

  0%|          | 0/25 [00:00<?, ?it/s]

{'eval_loss': 0.24193769693374634,
 'eval_accuracy': 0.9225,
 'eval_recall': 0.9581993569131833,
 'eval_precision': 0.9430379746835443,
 'eval_f1': 0.9505582137161085,
 'eval_runtime': 3.3033,
 'eval_samples_per_second': 242.185,
 'eval_steps_per_second': 7.568,
 'epoch': 2.0}

# 3.c Comparación de resultados

Resultados del modelo predeterminado:
* Accuracy of default model: 0.799
* Precision of default model: 0.845389654021389
* Recall of default model: 0.799
* F1 Score of default model: 0.8105414968381266

Resultados del modelo (fine-tuned)
* Accuracy of fine-tuned model: 0.9225
* Precision of fine-tuned model: 0.9430379746835443
* Recall of fine-tuned model: 0.958199356913183
* F1 Score of fine-tuned model: 0.9505582137161085


El modelo adaptado (fine-tuned) obtuvo resultados considerablemente mejores al modelo predeterminado. Por lo que podría argumentarse que es conveniente ajustar los modelos pre-entrenados al conjunto de datos para el cual será utilizado, con el objetivo de obtener mejores resultados.

# 4. Modelo pre-entrenado de traducción

In [9]:
translation_pipeline = pipeline("translation_en_to_es", model="Helsinki-NLP/opus-mt-en-es", device=1)




In [11]:
from tqdm import tqdm

def translate_text(text, translation_pipeline, max_length=512):
    """
    Traduce el texto dado utilizando la pipeline de traducción proporcionada. 
    Divide el texto en segmentos si es más largo que max_length.
    """
    # Divide el texto en segmentos de longitud max_length
    text_segments = [text[i : i + max_length] for i in range(0, len(text), max_length)]
    
    translated_text = ""
    
    for segment in text_segments:
        translation = translation_pipeline(segment)
        translated_text += translation[0]['translation_text']
        
    return translated_text

# Translate the batch of reviews and add them to the list
translated_reviews = [translate_text(review, translation_pipeline) for review in tqdm(df['review'].tolist())]


100%|██████████| 5000/5000 [1:37:24<00:00,  1.17s/it]  


In [14]:
# Create a new DataFrame with the translated reviews
translated_df = pd.DataFrame({'translated_review': translated_reviews})

# Save the translated reviews to a CSV file
translated_df.to_csv('translated_reviews.csv', index=False)

In [13]:
translated_df.head()

Unnamed: 0,translated_review
0,Creo que es más caro que la farmacia que sólo ...
1,"Cuando vi esto en Amazon, lo puse en mi lista ..."
2,Realmente nos gustan estos valances. Tienen pe...
3,No hay luz que emita de la luz nocturna. Lo pi...
4,Realmente esperaba que esto fuera un dispensad...


In [15]:
# Create a new DataFrame with the translated reviews
new_df = pd.DataFrame({'review': translated_df['translated_review'], 'rating': df['rating']})
new_df

Unnamed: 0,review,rating
0,Creo que es más caro que la farmacia que sólo ...,0
1,"Cuando vi esto en Amazon, lo puse en mi lista ...",1
2,Realmente nos gustan estos valances. Tienen pe...,1
3,No hay luz que emita de la luz nocturna. Lo pi...,0
4,Realmente esperaba que esto fuera un dispensad...,0
...,...,...
4995,Me gusta que este portador es como el Moby en ...,1
4996,La caja estaba dañada a la llegada. Tenía mied...,1
4997,Comprado para la graduación. Rec'd en 2 días c...,1
4998,Para todos los comentarios que dijeron que est...,0


In [16]:
new_df.to_csv("translated_reviews2.csv", index=False)

En general las traducciones parecieran conservar gran parte del sentido original de la oración original, aunque podrían estarse perdiendo comentarios con regionalismos o un contextos que al traducirse literalmente podría estarse perdiendo el significado original.

# 5. Modelo "Pysentimiento" para análisis de sentimiento en español

In [7]:
new_df = pd.read_csv("translated_reviews2.csv")
new_df

Unnamed: 0,review,rating
0,Creo que es más caro que la farmacia que sólo ...,0
1,"Cuando vi esto en Amazon, lo puse en mi lista ...",1
2,Realmente nos gustan estos valances. Tienen pe...,1
3,No hay luz que emita de la luz nocturna. Lo pi...,0
4,Realmente esperaba que esto fuera un dispensad...,0
...,...,...
4995,Me gusta que este portador es como el Moby en ...,1
4996,La caja estaba dañada a la llegada. Tenía mied...,1
4997,Comprado para la graduación. Rec'd en 2 días c...,1
4998,Para todos los comentarios que dijeron que est...,0


In [8]:
X_train, X_test, y_train, y_test = train_test_split(new_df['review'].copy(), new_df['rating'].copy(), test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
!pip install pysentimiento

In [11]:
from pysentimiento import create_analyzer
analyzer = create_analyzer(task="sentiment", lang="es")

In [21]:
def get_sentiment(pipeline, text, max_length=512):
    if len(text) > max_length:
      text = text[:max_length]
    return pipeline.predict(text)

prediction = get_sentiment(analyzer, X_test.tolist()[0])
prediction.output

'POS'

In [22]:
# Make predictions on the test set
predictions_default_model = [get_sentiment(analyzer, review) for review in X_test]

# Convert the predictions to 1 or 0 sentiments
predictions_default_model = [1 if prediction.output == 'POS' else 0 for prediction in predictions_default_model]

# Metrics
test_accuracy = accuracy_score(y_test, predictions_default_model)
test_precision = precision_score(y_test, predictions_default_model, average='weighted')
test_recall = recall_score(y_test, predictions_default_model, average='weighted')
test_f1_score = f1_score(y_test, predictions_default_model, average='weighted')

print(f'Accuracy of default model: {test_accuracy}')
print(f'Precision of default model: {test_precision}')
print(f'Recall of default model: {test_recall}')
print(f'F1 Score of default model: {test_f1_score}')



Accuracy of default model: 0.776
Precision of default model: 0.8464974416904778
Recall of default model: 0.776
F1 Score of default model: 0.7909760164165706


Resultados:

| Metric        | Value                 |
|:-------------:|:---------------------:|
| Accuracy      | 0.776                 |
| Precision     | 0.8464974416904778    |
| Recall        | 0.776                 |
| F1 Score      | 0.7909760164165706    |



Se obtuvieron resultados relativamente peores al modelo por defecto en inglés, y considerablemente peores a los resultados del modelo ajustado en inglés. Esto podría ser debido a que ciertos significados de los datos se pierdan durante la traducción, por lo que podría ser necesario traducirlos y revisarlos de manera manual. Asimismo, podría deberse a que es necesario ajustar el modelo en español a los datos particulares que se están utilizando; o, incluso, a que el modelo en español fué entranado con menos datos, o datos de menor calidad, que los modelos en inglés.