# Naïve Bayes desde cero
Este notebook implementa un clasificador de sentimientos usando Naïve Bayes sin bibliotecas especializadas como sklearn. Ideal para proyectos académicos con restricciones de implementación desde cero.


In [4]:
# Paso 1: Cargar el dataset limpio y vectorizado
import pandas as pd

# Reemplaza con el path correcto si es necesario
#df = pd.read_csv('04tweets_vectorizados_bow.csv')
df = pd.read_csv('04tweets_vectorizados_bow.csv')
df.head()
print("Hola mundo desde la celda 1")


Hola mundo desde la celda 1


In [6]:
# Paso 2: Separar datos en X (features) e y (etiquetas)
# Paso 2: Separar datos en X (features) e y (etiquetas)
X = df.drop('tweet', axis=1)
y = df['tweet'].astype(str)  # 🔧 convertimos a str para evitar mezcla de tipos
df.head(10)


Unnamed: 0,aaaaaaaahhhhhhhh,aaaaaah,aaaaaw,aaaawww,aaargh,aaarrrgh,aaaw,aaawww,aah,aasman,...,zit,zls,znl,zo,zombie,zorz,zu,zulu,zune,zwitschert
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [8]:
# Paso 3: Implementación desde cero del clasificador Naïve Bayes
import numpy as np

class NaiveBayes:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.class_probs = {}
        self.word_probs = {}
        
        for c in self.classes:
            X_c = X[y == c]
            self.class_probs[c] = len(X_c) / len(X)
            total_wc = X_c.sum().sum()
            self.word_probs[c] = (X_c.sum() + 1) / (total_wc + X.shape[1])
            
    def predict(self, X):
        predictions = []
        for i in range(X.shape[0]):
            posteriors = {}
            row = X.iloc[i].values  # asegurar que sea un array
            for c in self.classes:
                log_prob = np.log(self.class_probs[c]) + np.sum(row * np.log(self.word_probs[c].values))
                posteriors[c] = log_prob
            predictions.append(max(posteriors, key=posteriors.get))
        return np.array(predictions)

In [10]:
# Paso 4: Entrenar y evaluar
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Asegurarse que X_train y X_test sean DataFrames para evitar errores con .iloc y .sum
X_train = pd.DataFrame(X_train, columns=X.columns)
X_test = pd.DataFrame(X_test, columns=X.columns)

model = NaiveBayes()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluación básica desde cero
accuracy = (y_pred == y_test.values).mean()
print(f'Accuracy: {accuracy:.4f}')

Accuracy: 0.3480


In [12]:
import numpy as np
import pandas as pd

# Paso 1: Obtener clases
clases = np.unique(y_test)

# Paso 2: Inicializar matriz de confusión
conf_matrix = pd.DataFrame(0, index=clases, columns=clases)

# Paso 3: Llenar matriz de confusión
for true, pred in zip(y_test, y_pred):
    conf_matrix.loc[true, pred] += 1

print("📊 Matriz de Confusión:")
print(conf_matrix)

# Paso 4: Calcular precision, recall y F1 por clase
metricas = []

for clase in clases:
    TP = conf_matrix.loc[clase, clase]
    FP = conf_matrix[clase].sum() - TP
    FN = conf_matrix.loc[clase].sum() - TP
    
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
    
    metricas.append({
        "Clase": clase,
        "Precision": precision,
        "Recall": recall,
        "F1-Score": f1
    })

# Paso 5: Mostrar tabla de métricas
df_metricas = pd.DataFrame(metricas)
print("\n📈 Métricas por clase:")
print(df_metricas)

# Paso 6: Promedios macro
macro_avg = df_metricas[["Precision", "Recall", "F1-Score"]].mean()
print("\n📐 Promedio Macro:")
print(macro_avg)


📊 Matriz de Confusión:
          negative  neutral  positive
negative        36      190        77
neutral         50      224       122
positive        44      169        88

📈 Métricas por clase:
      Clase  Precision    Recall  F1-Score
0  negative   0.276923  0.118812  0.166282
1   neutral   0.384220  0.565657  0.457610
2  positive   0.306620  0.292359  0.299320

📐 Promedio Macro:
Precision    0.322588
Recall       0.325609
F1-Score     0.307737
dtype: float64


In [15]:
# Backend Flask para el modelo Naive Bayes entrenado
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.get_json()
    texto = data.get('text', '')
    if not texto:
        return jsonify({'error': 'No se proporcionó texto'}), 400

    vector = vectorizer.transform([texto])
    pred = model.predict(pd.DataFrame(vector.toarray(), columns=X.columns))
    return jsonify({'sentimiento': pred[0]})

if __name__ == '__main__':
    app.run(debug=True)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (fsevents)
0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 1074, in launch_instance
    app.initialize(argv)
  File "/opt/anaconda3/lib/python3.12/site-packages/traitlets/config/application.py", line 118, in inner
    return method(app, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 654,

SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
