# Evaluación del Modelo

Este notebook evalúa el rendimiento del modelo entrenado utilizando PySpark.

In [None]:
import os
import numpy as np
import tensorflow as tf
from pyspark.sql import SparkSession
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc
import pickle
import pandas as pd

# Iniciar Spark
spark = SparkSession.builder \
    .appName("ModelEvaluation") \
    .config("spark.driver.memory", "4g") \
    .config("spark.executor.memory", "4g") \
    .getOrCreate()

# Configurar paths
MODEL_DIR = 'data/models'
PROCESSED_DIR = 'data/processed'
EVAL_DIR = os.path.join(MODEL_DIR, 'evaluation')
os.makedirs(EVAL_DIR, exist_ok=True)

In [None]:
# Cargar modelo
print("Cargando modelo...")
model = tf.keras.models.load_model(os.path.join(MODEL_DIR, "final_model.h5"))

# Cargar historia de entrenamiento
print("Cargando historial de entrenamiento...")
with open(os.path.join(MODEL_DIR, "training_history.pkl"), "rb") as f:
    history = pickle.load(f)

# Cargar datos de validación
print("Cargando datos de validación...")
df_val = spark.read.parquet(os.path.join(PROCESSED_DIR, "processed_pages.parquet"))
val_data = df_val.toPandas()

# Preparar datos
X_val = np.array([np.array(x) for x in val_data["features"]])
y_val = val_data["label"].values

# Realizar predicciones
print("Realizando predicciones...")
y_pred = model.predict(X_val)
y_pred_classes = (y_pred > 0.5).astype(int)

In [None]:
def plot_training_history(history):
    """Visualiza el historial de entrenamiento"""
    plt.figure(figsize=(15, 5))
    
    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history['accuracy'], label='Training Accuracy')
    plt.plot(history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history['loss'], label='Training Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.savefig(os.path.join(EVAL_DIR, 'training_history.png'))
    plt.show()

print("Visualizando historial de entrenamiento...")
plot_training_history(history)

In [None]:
def plot_confusion_matrix(y_true, y_pred):
    """Visualiza la matriz de confusión"""
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['No Primera', 'Primera'],
                yticklabels=['No Primera', 'Primera'])
    plt.title('Matriz de Confusión')
    plt.ylabel('Etiqueta Verdadera')
    plt.xlabel('Etiqueta Predicha')
    plt.savefig(os.path.join(EVAL_DIR, 'confusion_matrix.png'))
    plt.show()

print("\nGenerando matriz de confusión...")
plot_confusion_matrix(y_val, y_pred_classes)

# Imprimir reporte de clasificación
print("\nReporte de Clasificación:")
print(classification_report(y_val, y_pred_classes))

In [None]:
def plot_roc_curve(y_true, y_pred_prob):
    """Visualiza la curva ROC"""
    fpr, tpr, _ = roc_curve(y_true, y_pred_prob)
    roc_auc = auc(fpr, tpr)
    
    plt.figure(figsize=(8, 8))
    plt.plot(fpr, tpr, color='darkorange', lw=2,
             label=f'ROC curve (AUC = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Tasa de Falsos Positivos')
    plt.ylabel('Tasa de Verdaderos Positivos')
    plt.title('Curva ROC')
    plt.legend(loc="lower right")
    plt.savefig(os.path.join(EVAL_DIR, 'roc_curve.png'))
    plt.show()
    return roc_auc

print("\nGenerando curva ROC...")
roc_auc = plot_roc_curve(y_val, y_pred)
print(f"AUC-ROC: {roc_auc:.4f}")

In [None]:
def analyze_errors(X_val, y_val, y_pred, y_pred_classes, val_data, num_examples=5):
    """Analiza los errores del modelo"""
    # Encontrar errores
    errors = np.where(y_val != y_pred_classes)[0]
    
    # Crear DataFrame con información de errores
    error_info = pd.DataFrame({
        'True_Label': y_val[errors],
        'Predicted_Label': y_pred_classes[errors],
        'Confidence': y_pred[errors],
        'PDF_Name': val_data.iloc[errors]['pdf_name'],
        'Page_Number': val_data.iloc[errors]['page_number']
    })
    
    print("\nAnálisis de Errores:")
    print(f"Total de errores: {len(errors)}")
    print(f"Tasa de error: {len(errors)/len(y_val)*100:.2f}%")
    
    # Mostrar algunos ejemplos de errores
    print("\nEjemplos de predicciones incorrectas:")
    print(error_info.head(num_examples))
    
    # Guardar información de errores
    error_info.to_csv(os.path.join(EVAL_DIR, 'error_analysis.csv'), index=False)
    return error_info

print("\nRealizando análisis de errores...")
error_analysis = analyze_errors(X_val, y_val, y_pred, y_pred_classes, val_data)

In [None]:
# Generar reporte final
report = {
    'model_performance': {
        'accuracy': (y_val == y_pred_classes).mean(),
        'auc_roc': roc_auc,
        'total_samples': len(y_val),
        'error_rate': len(error_analysis) / len(y_val)
    },
    'class_distribution': {
        'actual': dict(zip(*np.unique(y_val, return_counts=True))),
        'predicted': dict(zip(*np.unique(y_pred_classes, return_counts=True)))
    }
}

# Guardar reporte
with open(os.path.join(EVAL_DIR, 'evaluation_report.json'), 'w') as f:
    json.dump(report, f, indent=4)

print("\nReporte Final:")
for metric, value in report['model_performance'].items():
    print(f"{metric}: {value:.4f}")

# Limpiar sesión de Spark
spark.stop()