In [41]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from keras.layers import Dropout
import seaborn as sns
import matplotlib.pyplot as plt


In [14]:
# Load train and test data
train_data = pd.read_csv("hotel_train.csv")
test_data = pd.read_csv("hotel_test.csv")


In [26]:
#Load data for modeling
model_data = pd.read_csv("hotel_modeling.csv")

# Create Dummy Variables for modeling

In [27]:
# Create dummy variables for 'customer_type' and 'deposit_type'
dummy_columns = ['hotel', 'arrival_date_month', 'meal', 'country', 'market_segment', 'distribution_channel', 'reserved_room_type', 'assigned_room_type'
                ,'deposit_type', 'customer_type']
dummy_df = pd.get_dummies(model_data[dummy_columns], dtype = int)

# Concatenate the dummy variables with the original DataFrame
model_data_wd = pd.concat([model_data, dummy_df], axis=1)

#drop the original columns
model_data_wd.drop(columns=dummy_columns, inplace=True)

print(pd.DataFrame.head(model_data_wd))

   is_canceled  lead_time  stays_in_weekend_nights  stays_in_week_nights  \
0            0        342                        0                     0   
1            0        737                        0                     0   
2            0          7                        0                     1   
3            0         13                        0                     1   
4            0         14                        0                     2   

   adults  children  babies  is_repeated_guest  previous_cancellations  \
0       2       0.0       0                  0                       0   
1       2       0.0       0                  0                       0   
2       1       0.0       0                  0                       0   
3       1       0.0       0                  0                       0   
4       2       0.0       0                  0                       0   

   previous_bookings_not_canceled  ...  assigned_room_type_K  \
0                               0 

# Split data for training / testing

In [64]:
X = model_data_wd.drop(columns=['is_canceled'])  # Features
y = model_data_wd['is_canceled']                 # Target variable

#Splitting the dataset into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Print the shapes of the resulting datasets
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

Shape of X_train: (83573, 252)
Shape of X_test: (35817, 252)
Shape of y_train: (83573,)
Shape of y_test: (35817,)
        lead_time  stays_in_weekend_nights  stays_in_week_nights  adults  \
9710          122                        0                     4       2   
40307          20                        0                     2       2   
110051         87                        2                     3       3   
22999           0                        0                     1       1   
8344           42                        0                     1       2   
...           ...                      ...                   ...     ...   
76820           2                        0                     1       2   
110268         91                        2                     3       2   
103694         74                        0                     3       3   
860           130                        2                     5       2   
15795          16                        0        

# Neural Network

In [56]:
from keras.optimizers import Nadam
# Define the model
model_nn = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)), #Input Layer
   #Dropout(0.2),
    Dense(64, activation='relu'), #Hidden Layer
    #Dense(64, activation='relu'), #Hidden Layer
    #Dense(64, activation='relu'), #Hidden Layer
    #Dense(10, activation='softmax'), #Hidden Layer
    #Dropout(0.2),
    Dense(10, activation='sigmoid'), #Output Layer   
])
optimizer = Nadam(learning_rate=0.001)
# Compile the model
model_nn.compile(optimizer=optimizer,
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])

# Train the model
model_nn.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
test_loss_nn, test_acc_nn = model_nn.evaluate(X_test, y_test)
print('Neural Network - Test accuracy:', test_acc_nn)

Epoch 1/10
[1m2612/2612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.6802 - loss: 0.8419 - val_accuracy: 0.6276 - val_loss: 0.6610
Epoch 2/10
[1m2612/2612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1ms/step - accuracy: 0.6273 - loss: 0.6612 - val_accuracy: 0.6276 - val_loss: 0.6605
Epoch 3/10
[1m2612/2612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6282 - loss: 0.6605 - val_accuracy: 0.6276 - val_loss: 0.6611
Epoch 4/10
[1m2612/2612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6285 - loss: 0.6604 - val_accuracy: 0.6276 - val_loss: 0.6624
Epoch 5/10
[1m2612/2612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6308 - loss: 0.6592 - val_accuracy: 0.6276 - val_loss: 0.6606
Epoch 6/10
[1m2612/2612[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.6325 - loss: 0.6580 - val_accuracy: 0.6276 - val_loss: 0.6604
Epoch 7/10
[1m2