In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, mean_squared_error
import os

classification_path = os.path.join(os.path.dirname(os.path.abspath('')), 'ejercicio_10', 'titanic_proc.csv')
regression_path = os.path.join(os.path.dirname(os.path.abspath('')), 'ejercicio_10', 'targets.csv')

# Cargar los datasets
classification_data = pd.read_csv(classification_path)
regression_data = pd.read_csv(regression_path)

# Dataset de clasificación (Titanic)
X_class = classification_data.drop(columns='Survived')
y_class = classification_data['Survived']

# Dataset de regresión (Targets)
X_reg = regression_data.drop(columns='rotation')
y_reg = regression_data['rotation']

# Convertir variables categóricas a numéricas si es necesario
X_class = pd.get_dummies(X_class)
X_reg = pd.get_dummies(X_reg)

# Dividir los datasets en entrenamiento y prueba
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.3, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.3, random_state=42)

# Definir pipelines para ambos problemas
pipelines = {
    'svm_class_pipeline': Pipeline([
        ('scaler', StandardScaler()),
        ('svm', SVC(kernel='rbf'))
    ]),
    'random_forest_class_pipeline': Pipeline([
        ('scaler', StandardScaler()),
        ('random_forest', RandomForestClassifier())
    ]),
    'gradient_boosting_class_pipeline': Pipeline([
        ('scaler', StandardScaler()),
        ('gbc', GradientBoostingClassifier())
    ]),
    'mlp_class_pipeline': Pipeline([
        ('scaler', StandardScaler()),
        ('mlp', MLPClassifier(max_iter=1000))
    ]),
    'random_forest_reg_pipeline': Pipeline([
        ('scaler', StandardScaler()),
        ('random_forest', RandomForestRegressor())
    ]),
    'gradient_boosting_reg_pipeline': Pipeline([
        ('scaler', StandardScaler()),
        ('gbr', GradientBoostingRegressor())
    ]),
    'mlp_reg_pipeline': Pipeline([
        ('scaler', StandardScaler()),
        ('mlp', MLPRegressor(max_iter=1000))
    ])
}

# Definir hiperparámetros para los modelos
param_grids = {
    'svm_class_pipeline': {
        'svm__C': [0.1, 1, 10],
        'svm__gamma': [0.01, 0.1, 1]
    },
    'random_forest_class_pipeline': {
        'random_forest__n_estimators': [100, 200],
        'random_forest__max_depth': [10, 20, None]
    },
    'gradient_boosting_class_pipeline': {
        'gbc__n_estimators': [100, 200],
        'gbc__learning_rate': [0.01, 0.1, 0.2]
    },
    'mlp_class_pipeline': {
        'mlp__hidden_layer_sizes': [(50,), (100,)],
        'mlp__alpha': [0.0001, 0.001]
    },
    'random_forest_reg_pipeline': {
        'random_forest__n_estimators': [100, 200],
        'random_forest__max_depth': [10, 20, None]
    },
    'gradient_boosting_reg_pipeline': {
        'gbr__n_estimators': [100, 200],
        'gbr__learning_rate': [0.01, 0.1, 0.2]
    },
    'mlp_reg_pipeline': {
        'mlp__hidden_layer_sizes': [(50,), (100,)],
        'mlp__alpha': [0.0001, 0.001]
    }
}

# Ejecutar GridSearchCV para clasificación y regresión
results = {}
for name, pipeline in pipelines.items():
    param_grid = param_grids[name]
    grid_search = GridSearchCV(pipeline, param_grid, cv=5, n_jobs=-1)
    
    if 'class' in name:
        grid_search.fit(X_train_class, y_train_class)
        y_pred = grid_search.best_estimator_.predict(X_test_class)
        accuracy = accuracy_score(y_test_class, y_pred)
        results[name] = {'best_params': grid_search.best_params_, 'accuracy': accuracy}
        print(f"Accuracy ({name}):", accuracy)
        print(classification_report(y_test_class, y_pred))
    else:
        grid_search.fit(X_train_reg, y_train_reg)
        y_pred = grid_search.best_estimator_.predict(X_test_reg)
        mse = mean_squared_error(y_test_reg, y_pred)
        results[name] = {'best_params': grid_search.best_params_, 'mse': mse}
        print(f"Mean Squared Error ({name}):", mse)

# Mostrar resultados finales
print("\nResultados finales:")
for model, metrics in results.items():
    print(f"{model}: {metrics}")

KeyError: "['target'] not found in axis"