# Entrenamiento de Modelos
Entrenamiento de modelos Naive Bayes y SVM para clasificación de noticias.

In [1]:
# 04_modelos.ipynb

import os
import joblib
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report, confusion_matrix

# — Rutas —
split_dir  = os.path.join("..", "data", "split")
model_dir  = os.path.join("..", "models")
os.makedirs(model_dir, exist_ok=True)

train_path = os.path.join(split_dir, "train.pkl")
test_path  = os.path.join(split_dir, "test.pkl")

# — 1. Cargar datos vectorizados —
(X_train, y_train) = joblib.load(train_path)
(X_test,  y_test)  = joblib.load(test_path)

print("Tamaño train:", X_train.shape, y_train.shape)
print("Tamaño test: ", X_test.shape, y_test.shape)

# — 2. Entrenar y evaluar Naive Bayes —
nb = MultinomialNB()
nb.fit(X_train, y_train)

y_pred_nb = nb.predict(X_test)
print("\n📊 Evaluación — Naive Bayes")
print(classification_report(y_test, y_pred_nb, target_names=["Fake", "True"]))
print("Matriz de confusión:\n", confusion_matrix(y_test, y_pred_nb))

# Guardar modelo NB
nb_path = os.path.join(model_dir, "nb_model.pkl")
joblib.dump(nb, nb_path)
print(f"✅ Naive Bayes guardado en {nb_path}")

# — 3. Entrenar y evaluar SVM —
svm = LinearSVC()
svm.fit(X_train, y_train)

y_pred_svm = svm.predict(X_test)
print("\n📊 Evaluación — SVM (LinearSVC)")
print(classification_report(y_test, y_pred_svm, target_names=["Fake", "True"]))
print("Matriz de confusión:\n", confusion_matrix(y_test, y_pred_svm))

# Guardar modelo SVM
svm_path = os.path.join(model_dir, "svm_model.pkl")
joblib.dump(svm, svm_path)
print(f"✅ SVM guardado en {svm_path}")

print("\n✅ Entrenamiento y guardado de ambos modelos completado.")


Tamaño train: (45784, 5000) (45784,)
Tamaño test:  (11447, 5000) (11447,)

📊 Evaluación — Naive Bayes
              precision    recall  f1-score   support

        Fake       0.96      0.72      0.82      4776
        True       0.83      0.98      0.90      6671

    accuracy                           0.87     11447
   macro avg       0.89      0.85      0.86     11447
weighted avg       0.88      0.87      0.87     11447

Matriz de confusión:
 [[3434 1342]
 [ 151 6520]]
✅ Naive Bayes guardado en ..\models\nb_model.pkl

📊 Evaluación — SVM (LinearSVC)
              precision    recall  f1-score   support

        Fake       0.93      0.87      0.90      4776
        True       0.91      0.95      0.93      6671

    accuracy                           0.92     11447
   macro avg       0.92      0.91      0.91     11447
weighted avg       0.92      0.92      0.92     11447

Matriz de confusión:
 [[4136  640]
 [ 312 6359]]
✅ SVM guardado en ..\models\svm_model.pkl

✅ Entrenamiento y guar