In [1]:
# notebooks/06_Sentiment_Analysis.ipynb
import os
import logging
import sys
import pandas as pd
import torch

notebook_dir = os.path.dirname(os.path.abspath("__file__"))
project_root = os.path.dirname(notebook_dir)
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from config import XLSX_DIRECTORY
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.nn.functional import softmax

# Carga de modelos y tokenizers para el análisis de sentimiento
beto_model_name = "finiteautomata/beto-sentiment-analysis"
beto_tokenizer = AutoTokenizer.from_pretrained(beto_model_name)
beto_model = AutoModelForSequenceClassification.from_pretrained(beto_model_name)


# Cargar los datos desde el archivo Excel
logging.info("Cargando datos de oraciones desde Excel para análisis de sentimientos...")
df = pd.read_excel(os.path.join(XLSX_DIRECTORY, "sentence.xlsx"))


def analyze_sentiment(sentence, tokenizer, model):
    if not isinstance(sentence, str):
        logging.warning(f"Se encontró un valor no textual: {sentence}")
        return None, None
    inputs = tokenizer(sentence, return_tensors="pt", max_length=512, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits.squeeze()
    probs = softmax(logits, dim=0)
    sentiment_score = round(probs.max().item(), 4)
    sentiment_category = model.config.id2label[probs.argmax().item()]
    return sentiment_score, sentiment_category


def main():
    # Cargar los datos desde el archivo Excel
    sentence_data = pd.read_excel(os.path.join(XLSX_DIRECTORY, "sentence.xlsx"))

    # Preparar DataFrame para los resultados del análisis de sentimientos
    sentiment_df = pd.DataFrame(
        columns=["id_sentiment", "id_sentence", "sentiment_score", "sentiment_category"]
    )

    # Realizar análisis de sentimientos
    logging.info("Realizando análisis de sentimientos...")
    for index, row in sentence_data.iterrows():
        sentiment_score, sentiment_category = analyze_sentiment(
            row["sentence_clean"], beto_tokenizer, beto_model
        )
        if sentiment_score is not None and sentiment_category is not None:
            sentiment_df.loc[index] = [
                index + 1,
                row["id_sentence"],
                sentiment_score,
                sentiment_category,
            ]

    # Guardar los resultados del análisis de sentimientos en un nuevo archivo Excel
    logging.info("Guardando resultados del análisis de sentimientos en Excel...")
    sentiment_df.to_excel(os.path.join(XLSX_DIRECTORY, "sentiment.xlsx"), index=False)


if __name__ == "__main__":
    main()



In [None]:
# pip install vaderSentiment