# Hotel Cancellation Prediction

## Import Library

In [10]:
import pandas as pd
import os

from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

from modules.pipeline import init_local_pipeline
from modules.components import init_components

## Cleaning Dataset

In [3]:
df = pd.read_csv('dataset/hotel_reservation.csv')
df.head()

Unnamed: 0,Booking_ID,no_of_adults,no_of_children,no_of_weekend_nights,no_of_week_nights,type_of_meal_plan,required_car_parking_space,room_type_reserved,lead_time,arrival_year,arrival_month,arrival_date,market_segment_type,repeated_guest,no_of_previous_cancellations,no_of_previous_bookings_not_canceled,avg_price_per_room,no_of_special_requests,booking_status
0,INN00001,2,0,1,2,Meal Plan 1,0,Room_Type 1,224,2017,10,2,Offline,0,0,0,65.0,0,Not_Canceled
1,INN00002,2,0,2,3,Not Selected,0,Room_Type 1,5,2018,11,6,Online,0,0,0,106.68,1,Not_Canceled
2,INN00003,1,0,2,1,Meal Plan 1,0,Room_Type 1,1,2018,2,28,Online,0,0,0,60.0,0,Canceled
3,INN00004,2,0,0,2,Meal Plan 1,0,Room_Type 1,211,2018,5,20,Online,0,0,0,100.0,0,Canceled
4,INN00005,2,0,1,1,Not Selected,0,Room_Type 1,48,2018,4,11,Online,0,0,0,94.5,0,Canceled


In [9]:
df = pd.read_csv('dataset/hotel_reservation.csv', encoding='latin-1')

df.drop(columns=['Booking_ID'], inplace=True)

def encode_status(status):
    return 0 if status == 'Canceled' else 1

if df['booking_status'].dtype == object:
    df['booking_status'] = [encode_status(x) for x in df['booking_status']]

df.to_csv('clean_dataset/hotel_reservation_clean.csv', index=False, encoding='utf-8')
df.head()


Unnamed: 0,no_of_adults,no_of_children,no_of_weekend_nights,no_of_week_nights,type_of_meal_plan,required_car_parking_space,room_type_reserved,lead_time,arrival_year,arrival_month,arrival_date,market_segment_type,repeated_guest,no_of_previous_cancellations,no_of_previous_bookings_not_canceled,avg_price_per_room,no_of_special_requests,booking_status
0,2,0,1,2,Meal Plan 1,0,Room_Type 1,224,2017,10,2,Offline,0,0,0,65.0,0,1
1,2,0,2,3,Not Selected,0,Room_Type 1,5,2018,11,6,Online,0,0,0,106.68,1,1
2,1,0,2,1,Meal Plan 1,0,Room_Type 1,1,2018,2,28,Online,0,0,0,60.0,0,0
3,2,0,0,2,Meal Plan 1,0,Room_Type 1,211,2018,5,20,Online,0,0,0,100.0,0,0
4,2,0,1,1,Not Selected,0,Room_Type 1,48,2018,4,11,Online,0,0,0,94.5,0,0


## Set Variabel

In [11]:
PIPELINE_NAME = "hanhanhanny-pipeline"

DATA_ROOT = 'clean_dataset'
TRANSFORM_MODULE_FILE = "modules/hotel_transform.py"
TRAINER_TUNER_MODULE_FILE = "modules/hotel_trainer_tuner.py"

OUTPUT_BASE = "output"
serving_model_dir = os.path.join(OUTPUT_BASE, 'serving_model')
pipeline_root = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

## Run Pipeline

In [13]:
components = init_components(
    data_dir=DATA_ROOT,
    tuner_module=TRAINER_TUNER_MODULE_FILE,
    transform_module=TRANSFORM_MODULE_FILE,
    training_module=TRAINER_TUNER_MODULE_FILE,
    training_steps=5000,
    eval_steps=1000,
    serving_model_dir=serving_model_dir
)

pipeline = init_local_pipeline(components, pipeline_root)
BeamDagRunner().run(pipeline)

Trial 10 Complete [00h 00m 22s]
val_binary_accuracy: 0.7583906054496765

Best val_binary_accuracy So Far: 0.7710312604904175
Total elapsed time: 00h 04m 25s
INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit


Results summary
Results in output\hanhanhanny-pipeline\Tuner\.system\executor_execution\13\.temp\13\hotel-cancellation-prediction
Showing 10 best trials
Objective(name="val_binary_accuracy", direction="max")

Trial 04 summary
Hyperparameters:
units: 16
learning_rate: 0.001
num_layers: 3
Score: 0.7710312604904175

Trial 09 summary
Hyperparameters:
units: 16
learning_rate: 0.01
num_layers: 1
Score: 0.7583906054496765

Trial 00 summary
Hyperparameters:
units: 16
learning_rate: 0.001
num_layers: 2
Score: 0.7465624809265137

Trial 02 summary
Hyperparameters:
units: 256
learning_rate: 0.01
num_layers: 1
Score: 0.6962031126022339

Trial 08 summary
Hyperparameters:
units: 16
learning_rate: 0.01
num_layers: 2
Score: 0.6961874961853027

Trial 06 summary
Hyperparameters:
units: 64
learning_rate: 0.01
num_layers: 3
Score: 0.695968747138977

Trial 01 summary
Hyperparameters:
units: 256
learning_rate: 0.01
num_layers: 3
Score: 0.6958125233650208

Trial 03 summary
Hyperparameters:
units: 256
learning



Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 type_of_meal_plan_xf (InputLay  [(None, 5)]         0           []                               
 er)                                                                                              
                                                                                                  
 room_type_reserved_xf (InputLa  [(None, 6)]         0           []                               
 yer)                                                                                             
                                                                                                  
 market_segment_type_xf (InputL  [(None, 6)]         0           []                               
 ayer)                                                                                      

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: output\hanhanhanny-pipeline\Trainer\model\14\Format-Serving\assets


INFO:tensorflow:Assets written to: output\hanhanhanny-pipeline\Trainer\model\14\Format-Serving\assets


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.




































Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`
