In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# ----------------------------
# 1. Configuración y rutas
# ----------------------------
TRAIN_PATH   = 'train_preprocesado.csv'
VAL_PATH     = 'val_preprocesado.csv'
TEST_PATH    = 'test_preprocesado.csv'
MODEL_DIR    = 'models_nn'
os.makedirs(MODEL_DIR, exist_ok=True)

RANDOM_STATE = 42
BATCH_SIZE   = 256
MAX_ITER     = 200
PATIENCE     = 50  # early stopping
LEARNING_RATE= 1e-3
ALPHA        = 1e-4  # L2 regularization
SUBMIT_FILE  = os.path.join(MODEL_DIR, 'submission_mlp.csv')

# ----------------------------
# 2. Carga de datos
# ----------------------------
df_train = pd.read_csv(TRAIN_PATH)
df_val   = pd.read_csv(VAL_PATH)
df_test  = pd.read_csv(TEST_PATH)

# Separar IDs
train_ids = df_train['id'].values
val_ids   = df_val['id'].values

# Características y targets
y_train = df_train['prezo_euros'].values
X_train = df_train.drop(columns=['id','prezo_euros'])
y_val   = df_val['prezo_euros'].values
X_val   = df_val.drop(columns=['id','prezo_euros'])

# Para la submission
test_ids = df_test['id'].values
X_test   = df_test.drop(columns=['id'])

FEATURES = X_train.columns.tolist()

# ----------------------------
# 3. Escalado de características
# ----------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled   = scaler.transform(X_val)
X_test_scaled  = scaler.transform(X_test)

# ----------------------------
# 4. Definición y entrenamiento del MLP
# ----------------------------
mlp = MLPRegressor(
    hidden_layer_sizes=(1024, 512, 256, 128, 64, 32),
    activation='relu',
    solver='adam',
    alpha=ALPHA,
    batch_size=BATCH_SIZE,
    learning_rate_init=LEARNING_RATE,
    max_iter=MAX_ITER,
    early_stopping=True,
    n_iter_no_change=PATIENCE,
    validation_fraction=0.1,
    random_state=RANDOM_STATE,
    verbose=True
)

# Ajuste del modelo con early stopping automático
mlp.fit(X_train_scaled, y_train)

# Evaluar en validación externa
val_preds = mlp.predict(X_val_scaled)
val_rmse  = np.sqrt(mean_squared_error(y_val, val_preds))
print(f"Validation RMSE: {val_rmse:.2f} euros")

Iteration 1, loss = 34938836107.53905487
Validation score: 0.478656
Iteration 2, loss = 3960924759.34912872
Validation score: 0.774619
Iteration 3, loss = 2651605649.88038826
Validation score: 0.801740
Iteration 4, loss = 2458288540.36548090
Validation score: 0.811688
Iteration 5, loss = 2372195227.22597933
Validation score: 0.820406
Iteration 6, loss = 2312913372.10492182
Validation score: 0.824044
Iteration 7, loss = 2270682123.72034883
Validation score: 0.827731
Iteration 8, loss = 2223485673.31484509
Validation score: 0.828408
Iteration 9, loss = 2195353403.45056915
Validation score: 0.829938
Iteration 10, loss = 2165544193.58049822
Validation score: 0.833200
Iteration 11, loss = 2142116242.56525445
Validation score: 0.837120
Iteration 12, loss = 2111629543.21717739
Validation score: 0.838476
Iteration 13, loss = 2091940614.18797517
Validation score: 0.839347
Iteration 14, loss = 2078437562.79536057
Validation score: 0.841795
Iteration 15, loss = 2068923005.42436886
Validation scor

In [3]:
# ----------------------------
# 5. Retrain en train+val para submission
# ----------------------------
X_full  = np.vstack((X_train_scaled, X_val_scaled))
y_full  = np.concatenate((y_train, y_val))

mlp_full = MLPRegressor(
    hidden_layer_sizes=mlp.hidden_layer_sizes,
    activation=mlp.activation,
    solver=mlp.solver,
    alpha=mlp.alpha,
    batch_size=mlp.batch_size,
    learning_rate_init=mlp.learning_rate_init,
    max_iter=MAX_ITER,
    random_state=RANDOM_STATE,
    verbose=False
)
mlp_full.fit(X_full, y_full)



MLPRegressor(batch_size=256, hidden_layer_sizes=(1024, 512, 256, 128, 64, 32),
             random_state=42)

In [None]:
# ----------------------------
# 6. Generar submission
# ----------------------------
test_preds = mlp_full.predict(X_test_scaled)
submission = pd.DataFrame({
    'id': test_ids,
    'prezo_euros': test_preds
})
submission.to_csv(SUBMIT_FILE, index=False)
print(f"Submission guardada en {SUBMIT_FILE}")


Epoch 001 - Val RMSE: 54619.51
Epoch 002 - Val RMSE: 55197.31
Epoch 003 - Val RMSE: 55594.94
Epoch 004 - Val RMSE: 55329.61
Epoch 005 - Val RMSE: 54505.30
Epoch 006 - Val RMSE: 56881.05
Epoch 007 - Val RMSE: 55397.27
Epoch 008 - Val RMSE: 56504.47
Epoch 009 - Val RMSE: 54769.78
Epoch 010 - Val RMSE: 54590.26
Epoch 011 - Val RMSE: 59259.42
Epoch 012 - Val RMSE: 55069.93
Epoch 013 - Val RMSE: 56248.40
Epoch 014 - Val RMSE: 54399.22
Epoch 015 - Val RMSE: 56260.50
Epoch 016 - Val RMSE: 54956.83
Epoch 017 - Val RMSE: 57410.40
Epoch 018 - Val RMSE: 55765.74
Epoch 019 - Val RMSE: 54329.05
Epoch 020 - Val RMSE: 55389.84
Epoch 021 - Val RMSE: 54777.33
Epoch 022 - Val RMSE: 54487.06
Epoch 023 - Val RMSE: 55709.44
Epoch 024 - Val RMSE: 54596.14
Epoch 025 - Val RMSE: 56462.93
Epoch 026 - Val RMSE: 55470.80
Epoch 027 - Val RMSE: 54476.43
Epoch 028 - Val RMSE: 55092.89
Epoch 029 - Val RMSE: 58373.31
Epoch 030 - Val RMSE: 54480.88
Epoch 031 - Val RMSE: 55843.06
Epoch 032 - Val RMSE: 54518.32
Epoch 03