## MLP Neural Network - Hotel Booking Cancellation Prediction

In [1]:
#Importing Libraries

import pandas as pd
import numpy as np

from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    accuracy_score,
    roc_auc_score,
    confusion_matrix,
    classification_report
)

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import GridSearchCV

In [2]:
#Loading processed features and target

X_train = pd.read_csv("X_train_processed.csv")
X_test  = pd.read_csv("X_test_processed.csv")
y_train = pd.read_csv("y_train.csv").squeeze()
y_test  = pd.read_csv("y_test.csv").squeeze()

print("X_train shape:", X_train.shape)
print("X_test shape: ", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape: ", y_test.shape)

print("\nDtypes of first few columns:")
print(X_train.dtypes.head())

X_train shape: (95512, 231)
X_test shape:  (23878, 231)
y_train shape: (95512,)
y_test shape:  (23878,)

Dtypes of first few columns:
0    float64
1    float64
2    float64
3    float64
4    float64
dtype: object


In [3]:
#Handling class imbalance using SMOTE

smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

print("Before SMOTE:\n", y_train.value_counts())
print("\nAfter SMOTE:\n", y_train_balanced.value_counts())

Before SMOTE:
 is_canceled
0    60259
1    35253
Name: count, dtype: int64

After SMOTE:
 is_canceled
1    60259
0    60259
Name: count, dtype: int64


In [4]:
USE_SMOTE = True

if USE_SMOTE:
    X_train_mlp = X_train_balanced
    y_train_mlp = y_train_balanced
else:
    X_train_mlp = X_train
    y_train_mlp = y_train

In [5]:
#MLP Neural Model

mlp = MLPClassifier(
    hidden_layer_sizes=(64,),
    activation="relu",
    solver="adam",
    alpha=0.0001,
    learning_rate="adaptive",
    max_iter=50,
    early_stopping=True,
    n_iter_no_change=5,
    random_state=42
)

#Training the model
mlp.fit(X_train_mlp, y_train_mlp)

#Predict on test data
y_pred = mlp.predict(X_test)
y_proba = mlp.predict_proba(X_test)[:, 1]

#Evaluation
print("MLP Neural Network - Baseline Results")
print("Accuracy :", accuracy_score(y_test, y_pred))
print("ROC-AUC  :", roc_auc_score(y_test, y_proba))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

MLP Neural Network - Baseline Results
Accuracy : 0.8463439149007455
ROC-AUC  : 0.9346041731914401

Confusion Matrix:
 [[12444  2463]
 [ 1206  7765]]

Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.83      0.87     14907
           1       0.76      0.87      0.81      8971

    accuracy                           0.85     23878
   macro avg       0.84      0.85      0.84     23878
weighted avg       0.85      0.85      0.85     23878



In [6]:
#Hyperparameter Tuning

param_grid = {
    "hidden_layer_sizes": [(32,), (64,), (128,), (64, 32)],
    "alpha": [0.0001, 0.001, 0.01]
}

mlp_base = MLPClassifier(
    activation="relu",
    solver="adam",
    learning_rate="adaptive",
    max_iter=50,
    early_stopping=True,
    n_iter_no_change=5,
    random_state=42
)

grid_search_mlp = GridSearchCV(
    estimator=mlp_base,
    param_grid=param_grid,
    cv=3,
    scoring="roc_auc",
    n_jobs=-1,
    verbose=2
)

grid_search_mlp.fit(X_train_mlp, y_train_mlp)

print("Best parameters:", grid_search_mlp.best_params_)
print("Best CV ROC-AUC:", grid_search_mlp.best_score_)


Fitting 3 folds for each of 12 candidates, totalling 36 fits
Best parameters: {'alpha': 0.0001, 'hidden_layer_sizes': (64, 32)}
Best CV ROC-AUC: 0.9377070994016


In [7]:
#Best model

best_mlp = grid_search_mlp.best_estimator_

y_pred_best = best_mlp.predict(X_test)
y_proba_best = best_mlp.predict_proba(X_test)[:, 1]

print("Best MLP Neural Network - Test Results")
print("Accuracy :", accuracy_score(y_test, y_pred_best))
print("ROC-AUC  :", roc_auc_score(y_test, y_proba_best))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred_best))
print("\nClassification Report:\n", classification_report(y_test, y_pred_best))

print(
    f"\nFinal chosen MLP model: {best_mlp}\n"
    f"Test Accuracy: {accuracy_score(y_test, y_pred_best):.4f}, "
    f"Test ROC-AUC: {roc_auc_score(y_test, y_proba_best):.4f}"
)

Best MLP Neural Network - Test Results
Accuracy : 0.8522489320713628
ROC-AUC  : 0.9344063876373875

Confusion Matrix:
 [[12877  2030]
 [ 1498  7473]]

Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.86      0.88     14907
           1       0.79      0.83      0.81      8971

    accuracy                           0.85     23878
   macro avg       0.84      0.85      0.84     23878
weighted avg       0.85      0.85      0.85     23878


Final chosen MLP model: MLPClassifier(early_stopping=True, hidden_layer_sizes=(64, 32),
              learning_rate='adaptive', max_iter=50, n_iter_no_change=5,
              random_state=42)
Test Accuracy: 0.8522, Test ROC-AUC: 0.9344
