BIBLIOTECAS

In [None]:
#!pip install -q pandas scikit-learn gradio nltk

In [None]:
# Dataset
# https://www.kaggle.com/datasets/sid321axn/amazon-alexa-reviews

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import nltk
import gradio as gr
import re
nltk.download('stopwords')
from nltk.corpus import stopwords

df = pd.read_csv("amazon_alexa.tsv", sep="\t")

# Etiqueta de sentimiento
df['sentiment'] = df['feedback'].apply(lambda x: 'positivo' if x == 1 else 'negativo')

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/daredliuth/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Limpieza de Texto

In [3]:
# Stopwords en inglés
stop_words = set(stopwords.words('english'))

# Palabras que NO debemos eliminar (negaciones importantes)
palabras_negativas = {"no", "not", "nor", "n't", "never", "none"}

# Quitamos las palabras negativas de las stopwords
stop_words = stop_words - palabras_negativas

def limpiar_texto(texto):
    texto = str(texto).lower()
    texto = re.sub(r"[^\w\s]", "", texto)  # Quitamos signos de puntuación
    tokens = texto.split()
    tokens = [word for word in tokens if word not in stop_words]
    return " ".join(tokens)

df['clean_review'] = df['verified_reviews'].apply(limpiar_texto)

SEPARACIÓN DE DATOS

In [4]:
X = df['clean_review']
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

VECTORIZACIÓN

In [5]:
vectorizador = TfidfVectorizer()
X_train_vect = vectorizador.fit_transform(X_train)
X_test_vect = vectorizador.transform(X_test)

BALANCEO

In [6]:
smote = SMOTE(random_state=42)
X_train_sm, y_train_sm = smote.fit_resample(X_train_vect, y_train)

print("Antes del balanceo:")
print(y_train.value_counts())

print("\nDespués de aplicar SMOTE:")
print(pd.Series(y_train_sm).value_counts())

Antes del balanceo:
sentiment
positivo    2314
negativo     206
Name: count, dtype: int64

Después de aplicar SMOTE:
sentiment
positivo    2314
negativo    2314
Name: count, dtype: int64


ENTRENAR MODELO

In [7]:
modelo = MultinomialNB()
modelo.fit(X_train_sm, y_train_sm)

EVALUACIÓN

In [8]:
y_pred = modelo.predict(X_test_vect)
print("\nReporte de clasificación en conjunto de prueba:")
print(classification_report(y_test, y_pred))
print("Precisión general:", accuracy_score(y_test, y_pred))


Reporte de clasificación en conjunto de prueba:
              precision    recall  f1-score   support

    negativo       0.45      0.75      0.56        51
    positivo       0.98      0.92      0.95       579

    accuracy                           0.91       630
   macro avg       0.71      0.83      0.76       630
weighted avg       0.93      0.91      0.92       630

Precisión general: 0.9063492063492063


INTERFAZ CON GRADIO

In [9]:
def predecir_sentimiento(texto_usuario):
    texto_limpio = limpiar_texto(texto_usuario)
    texto_vect = vectorizador.transform([texto_limpio])
    prediccion = modelo.predict(texto_vect)[0]
    return f"Sentimiento detectado: {prediccion.upper()}"

interfaz = gr.Interface(
    fn=predecir_sentimiento,
    inputs=gr.Textbox(lines=3, placeholder="Escribe tu reseña de Alexa..."),
    outputs="text",
    title="Análisis de Sentimientos",
    description="Escribe una reseña y el modelo predecirá si es POSITIVA o NEGATIVA."
)

interfaz.launch(share=True)

* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://e919d893c5c18bd503.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


