In [1]:
import pickle
from pathlib import Path
import joblib

p = Path('../artifacts/data_splits.pkl')
if not p.exists():
    raise FileNotFoundError(f"No se encontró {p.resolve()}. Ejecuta el notebook 01 para generar el archivo o verifica la ruta.")

with open(p, 'rb') as f:
    splits = pickle.load(f)
    X_train = splits['X_train']
    X_test = splits['X_test']
    y_train = splits['y_train']
    y_test = splits['y_test']

print('Cargado data_splits.pkl con éxito')
print(f"X_train.shape: {X_train.shape}")
print(f"X_test.shape: {X_test.shape}")

Cargado data_splits.pkl con éxito
X_train.shape: (72721, 5000)
X_test.shape: (18181, 5000)


In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import joblib
from pathlib import Path

# Entrenar modelo baseline con regresión logística
clf = LogisticRegression(max_iter=200, random_state=42, solver='liblinear')
clf.fit(X_train, y_train)

# Predicciones sobre el set de test
y_pred = clf.predict(X_test)

# Evaluación básica
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"F1-score: {f1_score(y_test, y_pred):.4f}")
print("\nMatriz de confusión:")
print(confusion_matrix(y_test, y_pred))
print("\nReporte de clasificación:")
print(classification_report(y_test, y_pred))

# Guardar el clasificador entrenado en artifacts
artifacts_dir = Path('../artifacts')
artifacts_dir.mkdir(parents=True, exist_ok=True)

model_path = artifacts_dir / 'model.pkl'
joblib.dump(clf, model_path)
print(f"Modelo guardado en: {model_path.resolve()}")


Accuracy: 0.9122
F1-score: 0.9078

Matriz de confusión:
[[8725  366]
 [1231 7859]]

Reporte de clasificación:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92      9091
           1       0.96      0.86      0.91      9090

    accuracy                           0.91     18181
   macro avg       0.92      0.91      0.91     18181
weighted avg       0.92      0.91      0.91     18181

Modelo guardado en: C:\Users\ddani\Desktop\Aplicación de Machine Learning con Interfaz Web\Toxic_comment_classifier\artifacts\model.pkl
