In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error

In [2]:
# Load the datasets
train_data = pd.read_csv("/kaggle/input/playground-series-s4e5/train.csv")
test_data = pd.read_csv("/kaggle/input/playground-series-s4e5/test.csv")


In [3]:
# Extract features and target variable from training data
X_train = train_data.drop(columns=['id', 'FloodProbability'])
y_train = train_data['FloodProbability']

# Extract features from test data
X_test = test_data.drop(columns=['id'])

In [4]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [5]:
# Hyperparameter tuning for MLP model
parameter_space = {
    'hidden_layer_sizes': [(50,), (100,)],
    'activation': ['relu', 'tanh'],
    'alpha': [0.0001, 0.01],
    'learning_rate': ['constant', 'adaptive']
}

mlp_model = MLPRegressor(max_iter=200, random_state=42)
clf = GridSearchCV(mlp_model, parameter_space, cv=2, scoring='neg_mean_squared_error', verbose=2)
clf.fit(X_train_scaled, y_train)

Fitting 2 folds for each of 16 candidates, totalling 32 fits
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant; total time=  50.8s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=constant; total time=  47.5s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive; total time=  51.2s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(50,), learning_rate=adaptive; total time=  53.3s
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant; total time= 1.1min
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=constant; total time= 1.1min
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive; total time= 1.1min
[CV] END activation=relu, alpha=0.0001, hidden_layer_sizes=(100,), learning_rate=adaptive; total time= 1.1min
[CV] END activation=relu, alpha=0.01, hidden_layer_sizes=(50,),

In [6]:
# Get the best parameters from GridSearchCV
best_parameters = clf.best_params_
print("Best parameters found: ", best_parameters)


Best parameters found:  {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': (100,), 'learning_rate': 'constant'}


In [7]:
# Train the best model
best_mlp_model = clf.best_estimator_
best_mlp_model.fit(X_train_scaled, y_train)


In [8]:
# Evaluate performance on training data
y_train_pred_mlp = best_mlp_model.predict(X_train_scaled)
mlp_train_mse = mean_squared_error(y_train, y_train_pred_mlp)
print(f'MLP Train MSE: {mlp_train_mse}')


MLP Train MSE: 0.00037139523118260296


In [9]:
# Predict on test data using the best MLP model
y_test_pred_mlp = best_mlp_model.predict(X_test_scaled)


In [10]:
# Prepare submission file
submission = pd.DataFrame({
    'id': test_data['id'],
    'FloodProbability': y_test_pred_mlp
})
submission.to_csv('submission.csv', index=False)

print("Submission file saved to 'submission.csv'.")

Submission file saved to 'submission.csv'.
