# Librerias

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import os
import json
import joblib

In [2]:
def load_prepared_data():
    """Load the prepared dataset"""
    X = np.load('X_prepared.npy')
    y = np.load('y_prepared.npy')
    print (f"X shape: {X.shape}")
    print (f"y shape: {y.shape}")
    classes = np.load('label_encoder_classes.npy', allow_pickle=True)
    return X, y, classes

def evaluate_model(model, X_train, X_test, y_train, y_test, classes, model_name):
    # train
    model.fit(X_train, y_train)

    # pred
    y_pred = model.predict(X_test)

    # calidad metricas
    cv_scores = cross_val_score(model, X_train, y_train, cv=5)
    cm = confusion_matrix(y_test, y_pred)

    # matriz conf
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=classes, yticklabels=classes)
    plt.title(f'Confusion Matrix - {model_name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f'part2/{model_name.lower().replace(" ", "_")}_confusion_matrix.png')
    plt.close()

    report = classification_report(y_test, y_pred, target_names=classes)

    joblib.dump(model, f'part2/{model_name.lower().replace(" ", "_")}_model.pkl')

    return {
        'cv_scores': cv_scores,
        'mean_cv_score': cv_scores.mean(),
        'std_cv_score': cv_scores.std(),
        'report': report
    }

def main():
    os.makedirs('part2', exist_ok=True)

    print("Loading prepared dataset...")
    X, y, classes = load_prepared_data()

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    models = {
        'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
        'SVM': SVC(kernel='rbf', probability=True, random_state=42),
        'KNN': KNeighborsClassifier(n_neighbors=5),
        #'Neural Network': MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42),
        #'Compressed Neural Network': MLPClassifier(hidden_layer_sizes=(50,), max_iter=300, random_state=42)
    }

    # Ensamble learning
    ensemble = VotingClassifier(estimators=[
        ('rf', models['Random Forest']),
        ('svm', models['SVM']),
        ('knn', models['KNN'])
    ], voting='soft')
    models['Voting Ensemble'] = ensemble

    results = {}
    for model_name, model in models.items():
        print(f"\nEvaluating {model_name}...")
        results[model_name] = evaluate_model(
            model, X_train, X_test, y_train, y_test, classes, model_name
        )

        print(f"\n{model_name} Results:")
        print(f"Cross-validation scores: {results[model_name]['cv_scores']}")
        print(f"Mean CV score: {results[model_name]['mean_cv_score']:.3f} (\u00b1{results[model_name]['std_cv_score']:.3f})")
        print("\nClassification Report:")
        print(results[model_name]['report'])

    # Guardar resultados para reproducibilidad
    results_json = {}
    for model_name in results:
        results_json[model_name] = {
            'mean_cv_score': results[model_name]['mean_cv_score'],
            'std_cv_score': results[model_name]['std_cv_score'],
            'classification_report': results[model_name]['report']
        }

    with open('part2/experiment_results.json', 'w') as f:
        json.dump(results_json, f, indent=4)

    # Comparar Models
    plt.figure(figsize=(10, 6))
    model_names = list(results.keys())
    cv_means = [results[model]['mean_cv_score'] for model in model_names]
    cv_stds = [results[model]['std_cv_score'] for model in model_names]

    plt.bar(model_names, cv_means, yerr=cv_stds, capsize=5)
    plt.title('Model Performance Comparison')
    plt.ylabel('Cross-validation Score')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('part2/model_comparison.png')
    plt.close()

if __name__ == "__main__":
    main()


Loading prepared dataset...
X shape: (49598, 46)
y shape: (49598,)


FileNotFoundError: [Errno 2] No such file or directory: 'label_encoder_classes.npy'