In [1]:
import numpy as np
import pandas as pd

import random

from Helper import train_test_split, calculate_accuracy
from DecisionTreeAlgothm import decision_tree_algorithm, decision_tree_predictions

In [2]:
df = pd.read_csv("Hotel-Reservations.csv")
df = df.drop(columns="Booking_ID",axis=1)
df = df.rename(columns={"booking_status":"label"})

In [3]:
df['date'] = pd.to_datetime(dict(year=df.arrival_year, month=df.arrival_month, day=df.arrival_date), errors='coerce')
df["no_of_people"] = df["no_of_adults"]+df["no_of_children"]
df["no_of_nights"] = df["no_of_week_nights"]+df["no_of_weekend_nights"]
df['total_previous_booking'] = df['no_of_previous_cancellations'] + df['no_of_previous_bookings_not_canceled']

In [4]:
features = ['no_of_people','no_of_nights', 'total_previous_booking', 'room_type_reserved', 'avg_price_per_room','lead_time','market_segment_type','repeated_guest',
'no_of_special_requests', 'type_of_meal_plan', 'label']

In [5]:
df = df[features]
df.head()

Unnamed: 0,no_of_people,no_of_nights,total_previous_booking,room_type_reserved,avg_price_per_room,lead_time,market_segment_type,repeated_guest,no_of_special_requests,type_of_meal_plan,label
0,2,3,0,Room_Type 1,65.0,224,Offline,0,0,Meal Plan 1,Not_Canceled
1,2,5,0,Room_Type 1,106.68,5,Online,0,1,Not Selected,Not_Canceled
2,1,3,0,Room_Type 1,60.0,1,Online,0,0,Meal Plan 1,Canceled
3,2,2,0,Room_Type 1,100.0,211,Online,0,0,Meal Plan 1,Canceled
4,2,2,0,Room_Type 1,94.5,48,Online,0,0,Not Selected,Canceled


In [6]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df["room_type_reserved"] = le.fit_transform(df["room_type_reserved"])
df["market_segment_type"] = le.fit_transform(df["market_segment_type"])
df["type_of_meal_plan"] = le.fit_transform(df["type_of_meal_plan"])

In [7]:
for column in df.columns:
    print(column, '-'  ,len(df[column].unique()))

no_of_people - 8
no_of_nights - 25
total_previous_booking - 63
room_type_reserved - 7
avg_price_per_room - 3930
lead_time - 352
market_segment_type - 5
repeated_guest - 2
no_of_special_requests - 6
type_of_meal_plan - 4
label - 2


In [8]:
train_df, test_df = train_test_split(df,0.2)

In [9]:
def bootstrapping(train_df, n_bootstrap):
    bootstrap_indices = np.random.randint(low=0, high=len(train_df), size=n_bootstrap)
    df_bootstrapped = train_df.iloc[bootstrap_indices]
    
    return df_bootstrapped

def random_forest_algorithm(train_df, n_trees, n_bootstrap, n_features, dt_max_depth):
    forest = []
    for i in range(n_trees):
        df_bootstrapped = bootstrapping(train_df, n_bootstrap)
        tree = decision_tree_algorithm(df_bootstrapped, max_depth=dt_max_depth, random_subspace=n_features)
        forest.append(tree)
    
    return forest

def random_forest_predictions(test_df, forest):
    df_predictions = {}
    for i in range(len(forest)):
        column_name = "tree_{}".format(i)
        predictions = decision_tree_predictions(test_df, tree=forest[i])
        df_predictions[column_name] = predictions

    df_predictions = pd.DataFrame(df_predictions)
    random_forest_predictions = df_predictions.mode(axis=1)[0]
    
    return random_forest_predictions

In [10]:
forest = random_forest_algorithm(train_df, n_trees=5, n_bootstrap=len(train_df), n_features=5, dt_max_depth=10)
predictions = random_forest_predictions(test_df, forest)
accuracy = calculate_accuracy(predictions, test_df.label)

print("Accuracy = {}".format(accuracy))

Accuracy = 0.8624396967608546
