In [1]:
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras


In [7]:
hotel = pd.read_csv('hotel.csv')
hotel.drop(hotel.columns[0],axis=1, inplace=True)

In [10]:
hotel.describe()

Unnamed: 0,is_canceled,lead_time,arrival_date_year,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,is_repeated_guest,previous_cancellations,previous_bookings_not_canceled,booking_changes,agent,company,days_in_waiting_list,adr,required_car_parking_spaces,total_of_special_requests
count,119390.0,119390.0,119390.0,119390.0,119390.0,119390.0,119390.0,119390.0,119386.0,119390.0,119390.0,119390.0,119390.0,119390.0,103050.0,6797.0,119390.0,119390.0,119390.0,119390.0
mean,0.370416,104.011416,2016.156554,27.165173,15.798241,0.927599,2.500302,1.856403,0.10389,0.007949,0.031912,0.087118,0.137097,0.221124,86.693382,189.266735,2.321149,101.831122,0.062518,0.571363
std,0.482918,106.863097,0.707476,13.605138,8.780829,0.998613,1.908286,0.579261,0.398561,0.097436,0.175767,0.844336,1.497437,0.652306,110.774548,131.655015,17.594721,50.53579,0.245291,0.792798
min,0.0,0.0,2015.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,-6.38,0.0,0.0
25%,0.0,18.0,2016.0,16.0,8.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,62.0,0.0,69.29,0.0,0.0
50%,0.0,69.0,2016.0,28.0,16.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,179.0,0.0,94.575,0.0,0.0
75%,1.0,160.0,2017.0,38.0,23.0,2.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,229.0,270.0,0.0,126.0,0.0,1.0
max,1.0,737.0,2017.0,53.0,31.0,19.0,50.0,55.0,10.0,10.0,1.0,26.0,72.0,21.0,535.0,543.0,391.0,5400.0,8.0,5.0


In [11]:
hotel.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119390 entries, 0 to 119389
Data columns (total 32 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   hotel                           119390 non-null  object 
 1   is_canceled                     119390 non-null  int64  
 2   lead_time                       119390 non-null  int64  
 3   arrival_date_year               119390 non-null  int64  
 4   arrival_date_month              119390 non-null  object 
 5   arrival_date_week_number        119390 non-null  int64  
 6   arrival_date_day_of_month       119390 non-null  int64  
 7   stays_in_weekend_nights         119390 non-null  int64  
 8   stays_in_week_nights            119390 non-null  int64  
 9   adults                          119390 non-null  int64  
 10  children                        119386 non-null  float64
 11  babies                          119390 non-null  int64  
 12  meal            

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline

In [13]:
X = hotel.copy()
y = hotel.pop('is_canceled')

In [15]:
X['arrival_date_month'] = \
                    X['arrival_date_month'].map({'January':1, 'February': 2, 'March':3,
         'April':4, 'May':5, 'June':6, 'July':7,
         'August':8, 'September':9, 'October':10,
         'November':11, 'December':12})

In [17]:
feature_num = ["lead_time", "arrival_date_week_number",
    "arrival_date_day_of_month", "stays_in_weekend_nights",
    "stays_in_week_nights", "adults", "children", "babies",
    "is_repeated_guest", "previous_cancellations",
    "previous_bookings_not_canceled", "required_car_parking_spaces",
    "total_of_special_requests", "adr"]

feature_cat = ["hotel", "arrival_date_month", "meal",
    "market_segment", "distribution_channel",
    "reserved_room_type", "deposit_type", "customer_type"]

In [20]:
X['lead_time'].describe()

count    119390.000000
mean        104.011416
std         106.863097
min           0.000000
25%          18.000000
50%          69.000000
75%         160.000000
max         737.000000
Name: lead_time, dtype: float64

In [23]:
X['market_segment'].value_counts()

Online TA        56477
Offline TA/TO    24219
Groups           19811
Direct           12606
Corporate         5295
Complementary      743
Aviation           237
Undefined            2
Name: market_segment, dtype: int64

In [26]:
transformer_num = make_pipeline(
    SimpleImputer(strategy='constant'),
    StandardScaler(),
)

In [27]:
transformer_cat = make_pipeline(
    SimpleImputer(strategy="constant", fill_value='NA'),
    OneHotEncoder(handle_unknown='ignore'),

)

In [28]:
preprocessor = make_column_transformer(
                (transformer_num,feature_num),
                (transformer_cat,feature_cat),
)

In [29]:
preprocessor

ColumnTransformer(transformers=[('pipeline-1',
                                 Pipeline(steps=[('simpleimputer',
                                                  SimpleImputer(strategy='constant')),
                                                 ('standardscaler',
                                                  StandardScaler())]),
                                 ['lead_time', 'arrival_date_week_number',
                                  'arrival_date_day_of_month',
                                  'stays_in_weekend_nights',
                                  'stays_in_week_nights', 'adults', 'children',
                                  'babies', 'is_repeated_guest',
                                  'previous_cancellations',
                                  'previ...
                                  'required_car_parking_spaces',
                                  'total_of_special_requests', 'adr']),
                                ('pipeline-2',
                            

In [30]:
X_train, X_valid, y_train, y_valid = train_test_split(X,y,train_size=0.70)

In [31]:
X_train.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,deposit_type,agent,company,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
17101,Resort Hotel,0,262,2015,9,39,21,1,3,2,...,No Deposit,183.0,,0,Transient-Party,89.2,0,0,Check-Out,2015-09-25
58155,City Hotel,1,443,2016,10,41,7,0,2,2,...,Non Refund,1.0,,391,Transient,65.0,0,0,Canceled,2016-09-20
119049,City Hotel,0,61,2017,8,35,27,1,0,2,...,No Deposit,85.0,,0,Transient,90.6,0,1,Check-Out,2017-08-28
80450,City Hotel,1,34,2015,12,50,8,0,2,1,...,Non Refund,19.0,,0,Transient,90.0,0,0,Canceled,2015-11-17
114339,City Hotel,0,35,2017,6,24,13,2,5,2,...,No Deposit,9.0,,0,Transient,196.14,0,1,Check-Out,2017-06-20


In [32]:
y_train.head()

17101     0
58155     1
119049    0
80450     1
114339    0
Name: is_canceled, dtype: int64

In [33]:
X_train = preprocessor.fit_transform(X_train)

In [37]:
X_valid = preprocessor.fit_transform(X_valid)

In [38]:
input_shape = [X_train.shape[1]]

In [39]:
input_shape

[63]

In [40]:
from tensorflow.keras import layers

In [None]:
model = keras.Sequential([
        # First layer is Batch Normalization
        layers.BatchNormalization(input_shape=input_shape),
        # Second layer is Dense 
        layers.Dense(units=256,activation='relu'),
        # Third layer of Batch Normalization
        layers.BatchNormalization(),
        # Fourth layer of droput
        layers.Dropout(0.3),
        # 
    
])