# Comparación de Modelos de Clasificación de Sentimientos

Este notebook compara modelos tradicionales de ML vs redes neuronales para clasificación de sentimientos.

In [None]:
# Imports y configuración
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.preprocessing import preprocess_dataframe
from src.traditional.models import train_and_compare_models, TraditionalClassifier
from src.neural.models import train_and_compare_neural_models, NeuralClassifier

# Configuración de visualización
plt.style.use('seaborn-v0_8-whitegrid')
pd.set_option('display.max_columns', None)

## 1. Carga y Preprocesamiento de Datos

In [None]:
# Cargar dataset
df = pd.read_csv('../data/respuestas_cafeteria.csv')
print(f"Total de registros: {len(df)}")
print(f"\nDistribución de clases:")
print(df['kind_of_comment'].value_counts())
df.head()

In [None]:
# Preprocesar texto
df = preprocess_dataframe(df, text_column='comment', output_column='clean_comment')
df.head()

In [None]:
# Definir X e y
X = df['clean_comment']
y = df['kind_of_comment']

print(f"Total de muestras: {len(X)}")

## 2. Modelos Tradicionales (Logistic Regression, SVM, Random Forest)

In [None]:
# Entrenar y comparar modelos tradicionales
traditional_results, (X_train, X_test, y_train, y_test) = train_and_compare_models(X, y)

In [None]:
# Visualizar matrices de confusión - Modelos Tradicionales
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for idx, (model_name, data) in enumerate(traditional_results.items()):
    cm = data['results']['confusion_matrix']
    labels = data['classifier'].model.classes_ if hasattr(data['classifier'].model, 'classes_') else ['negativo', 'neutro', 'positivo']
    
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx],
                xticklabels=labels, yticklabels=labels)
    axes[idx].set_title(f"{model_name.upper()}\nAccuracy: {data['results']['accuracy']:.4f}")
    axes[idx].set_xlabel('Predicción')
    axes[idx].set_ylabel('Real')

plt.tight_layout()
plt.savefig('../models/traditional_confusion_matrices.png', dpi=150)
plt.show()

## 3. Modelos de Redes Neuronales (FNN, CNN, LSTM)

In [None]:
# Entrenar y comparar redes neuronales
neural_results, splits = train_and_compare_neural_models(
    X, y, 
    epochs=30,
    batch_size=32
)

In [None]:
# Visualizar curvas de aprendizaje
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

for idx, (model_name, data) in enumerate(neural_results.items()):
    history = data['history']
    
    axes[idx].plot(history['accuracy'], label='Train')
    if 'val_accuracy' in history:
        axes[idx].plot(history['val_accuracy'], label='Validation')
    
    axes[idx].set_title(f"{model_name.upper()} - Accuracy: {data['results']['accuracy']:.4f}")
    axes[idx].set_xlabel('Época')
    axes[idx].set_ylabel('Accuracy')
    axes[idx].legend()

plt.tight_layout()
plt.savefig('../models/neural_learning_curves.png', dpi=150)
plt.show()

## 4. Comparación Final

In [None]:
# Crear tabla comparativa
comparison_data = []

for model_name, data in traditional_results.items():
    comparison_data.append({
        'Modelo': model_name.upper(),
        'Tipo': 'Tradicional',
        'Accuracy': data['results']['accuracy']
    })

for model_name, data in neural_results.items():
    comparison_data.append({
        'Modelo': model_name.upper(),
        'Tipo': 'Red Neuronal',
        'Accuracy': data['results']['accuracy']
    })

comparison_df = pd.DataFrame(comparison_data)
comparison_df = comparison_df.sort_values('Accuracy', ascending=False)
print("\n" + "="*50)
print("COMPARACIÓN FINAL DE MODELOS")
print("="*50)
print(comparison_df.to_string(index=False))

In [None]:
# Gráfico de barras comparativo
plt.figure(figsize=(10, 6))
colors = ['#3498db' if t == 'Tradicional' else '#e74c3c' for t in comparison_df['Tipo']]

bars = plt.bar(comparison_df['Modelo'], comparison_df['Accuracy'], color=colors)

# Añadir valores sobre las barras
for bar, acc in zip(bars, comparison_df['Accuracy']):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
             f'{acc:.3f}', ha='center', va='bottom', fontweight='bold')

plt.title('Comparación de Accuracy por Modelo', fontsize=14)
plt.xlabel('Modelo')
plt.ylabel('Accuracy')
plt.ylim(0, 1.1)

# Leyenda
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor='#3498db', label='Tradicional'),
                   Patch(facecolor='#e74c3c', label='Red Neuronal')]
plt.legend(handles=legend_elements, loc='lower right')

plt.tight_layout()
plt.savefig('../models/model_comparison.png', dpi=150)
plt.show()

In [None]:
# Guardar el mejor modelo de cada tipo
best_traditional = max(traditional_results.items(), key=lambda x: x[1]['results']['accuracy'])
best_neural = max(neural_results.items(), key=lambda x: x[1]['results']['accuracy'])

print(f"Mejor modelo tradicional: {best_traditional[0]} ({best_traditional[1]['results']['accuracy']:.4f})")
print(f"Mejor modelo neuronal: {best_neural[0]} ({best_neural[1]['results']['accuracy']:.4f})")

# Guardar modelos
best_traditional[1]['classifier'].save(f'../models/best_traditional_{best_traditional[0]}.joblib')
best_neural[1]['classifier'].save(f'../models/best_neural_{best_neural[0]}.h5')

## 5. Conclusiones

Resumen de hallazgos:
- **Mejor modelo tradicional**: [completar tras ejecución]
- **Mejor red neuronal**: [completar tras ejecución]
- **Recomendación**: [completar según resultados]