In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report, matthews_corrcoef, roc_auc_score

In [4]:
data = pd.read_csv("/home/cloud/Datasets/Hotel Reservations_data_features.csv")

## Train test split the data

In [6]:
def train_test_split_only(data):
    y = data["booking_status"]
    x = data.drop("booking_status", axis=1)
    features = list(x.columns)
    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.20, random_state=0, stratify=y  # stratify maintains class balance
    )
    return x_train, x_test, y_train, y_test, features

In [8]:
x_train, x_test, y_train, y_test, features = train_test_split_only(data)

## Fit and evaluate the model

In [12]:
def fit_and_evaluate_decision_tree(x_train, x_test, y_train, y_test):
    dt = DecisionTreeClassifier(
        random_state=42,
        max_depth=8,
        min_samples_split=20,
        min_samples_leaf=10
    )

    model = dt.fit(x_train, y_train)

    y_pred = model.predict(x_test)
    y_prob = model.predict_proba(x_test)[:, 1]

  
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred),'\n')
    print(f"Accuracy: {accuracy_score(y_test, y_pred)*100:.2f}%",'\n')
    print(f"AUC Score: {roc_auc_score(y_test, y_prob)*100:.2f}%",'\n')
    print(f"MCC Score: {matthews_corrcoef(y_test, y_pred)*100:.2f}%",'\n')
    print(classification_report(y_test, y_pred))

    return model

In [14]:
dt_model = fit_and_evaluate_decision_tree(x_train, x_test, y_train, y_test)

Confusion Matrix:
 [[4432  446]
 [ 587 1790]] 

Accuracy: 85.76% 

AUC Score: 91.92% 

MCC Score: 67.25% 

              precision    recall  f1-score   support

           0       0.88      0.91      0.90      4878
           1       0.80      0.75      0.78      2377

    accuracy                           0.86      7255
   macro avg       0.84      0.83      0.84      7255
weighted avg       0.86      0.86      0.86      7255



In [18]:
import joblib
joblib.dump(
    {
        "model": dt_model,
        "features": features
    },
    "/home/cloud/ML_Models/decision_tree.pkl"
)

['/home/cloud/ML_Models/decision_tree.pkl']