In [None]:
# Install the module
import pandas as pd
import altair as alt

df = pd.read_csv("hotel_bookings.csv")

# Fill the missing values with 0 since this means that the booker was not apart of a company or they did not use an agent
df['company'].fillna(0, inplace=True)
df['agent'].fillna(0, inplace=True)

df.dropna(subset=['children'], inplace=True)
df.dropna(subset=['country'], inplace=True)

In [None]:
import geopandas
import folium
!pip install mapclassify

countries = geopandas.read_file("ne_10m_admin_0_countries/ne_10m_admin_0_countries.shp") # pull in country data

# group hotel data by country and aggregate columns
grouped = df.groupby(["country"]).agg(
    total_reservations = pd.NamedAgg(column = "hotel", aggfunc = "count"),
    avg_adults = pd.NamedAgg(column = "adults", aggfunc = "mean"),
    avg_lead_time = pd.NamedAgg(column = "lead_time", aggfunc = "mean"),
    avg_special_requests = pd.NamedAgg(column = "total_of_special_requests", aggfunc = "mean"),
    avg_adr = pd.NamedAgg(column = "adr", aggfunc = "mean"),
    percent_resort = pd.NamedAgg(column = "hotel", aggfunc = lambda x: (x == "Resort Hotel").sum() / x.size),
    percent_city = pd.NamedAgg(column = "hotel", aggfunc = lambda x: (x == "City Hotel").sum() / x.size),
    percent_no_deposit = pd.NamedAgg(column = "deposit_type", aggfunc = lambda x: (x == "No Deposit").sum() / x.size),
    percent_non_refund = pd.NamedAgg(column = "deposit_type", aggfunc = lambda x: (x == "Non Refund").sum() / x.size),
    percent_refundable = pd.NamedAgg(column = "deposit_type", aggfunc = lambda x: (x == "Refundable").sum() / x.size),
    percent_online_ta = pd.NamedAgg(column = "market_segment", aggfunc = lambda x: (x == "Online TA").sum() / x.size),
    percent_offline_ta_to = pd.NamedAgg(column = "market_segment", aggfunc = lambda x: (x == "Offline TA/TO").sum() / x.size),
    percent_groups = pd.NamedAgg(column = "market_segment", aggfunc = lambda x: (x == "Groups").sum() / x.size),
    percent_direct = pd.NamedAgg(column = "market_segment", aggfunc = lambda x: (x == "Direct").sum() / x.size)
)

full = countries.merge(grouped, how = "left", left_on = "SOV_A3", right_on = "country") # merge hotel data in

# map it
map = full.explore(column = "avg_adr",
             legend = True,
             tooltip = ["total_reservations",
                        "avg_adults", "avg_lead_time", "avg_special_requests", "avg_adr",                  # avg stats
                        "percent_resort", "percent_city",                                                  # hotel type
                        "percent_no_deposit", "percent_non_refund", "percent_refundable",                  # deposit type
                        "percent_online_ta", "percent_offline_ta_to", "percent_groups", "percent_direct"   # market segment
                        ]
             )

map

In [None]:
alt.data_transformers.disable_max_rows()

options = [None, 2015, 2016, 2017]
labels = ['All', '2015', '2016', '2017']

input_dropdown = alt.binding_select(
    options = options, 
    labels = labels,
    name = 'Arrival Year: '
)

selection = alt.selection_point(fields=['arrival_date_year'], bind=input_dropdown)

options1 = [None, 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']
labels1 = ['All', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']

input_dropdown1 = alt.binding_select(
    options = options1, 
    labels = labels1,
    name = 'Room Type: '
)

selection1 = alt.selection_point(fields=['reserved_room_type'], bind=input_dropdown1)

options2 = [None, 'City Hotel', 'Resort Hotel']
labels2 = ['All', 'City Hotel', 'Resort Hotel']

input_dropdown2 = alt.binding_select(
    options = options2, 
    labels = labels2,
    name = 'Hotel Type: '
)

selection2 = alt.selection_point(fields=['hotel'], bind=input_dropdown2)

chart = alt.Chart(df).mark_circle().encode(
    x=alt.X('adr:Q', axis=alt.Axis(title='Average Daily Rate (Dollars)')),
    y=alt.Y('lead_time:Q', axis=alt.Axis(title='Lead Time (Days Until Arrival When Booked)')),
    color=alt.Color('hotel:N', legend=alt.Legend(title='Hotel'))
).add_params(
    selection1,
    selection2 
).transform_filter(
    selection2 & selection1
).properties(
    title='Average Daily Rate vs Lead Time for Hotel Bookings by Hotel and Room Type'
)

chart

In [None]:
alt.data_transformers.disable_max_rows()
colors = ['#cbc9e2', '#9e9ac8', '#6a51a3']

line = alt.Chart(df, title="Average Daily Cost of Hotel Booking for Each Week through the Years 2015-2017").mark_line().encode(
    alt.X('arrival_date_week_number:O', title = 'Arrival Date Week Number'),
    alt.Y('mean(adr):Q', title = 'Average Daily Cost of Hotel Booking for a Week'),
    alt.Color('arrival_date_year:N', title = 'Arrival Date Year').scale(range=colors)
).properties(width=1000, height=400)

point = alt.Chart(df, title="Average Daily Cost of Hotel Booking for Each Week through the Years 2015-2017").mark_circle().encode(
    alt.X('arrival_date_week_number:O', title = 'Arrival Date Week Number'),
    alt.Y('mean(adr):Q', title = 'Average Daily Cost of Hotel Booking for a Week'),
    alt.Color('arrival_date_year:N', title = 'Arrival Date Year').scale(range=colors)
).properties(width=1000, height=400)

line + point

In [None]:
df['travel_agent_used'] = df['agent'].apply(lambda x: 1 if x > 0 else 0)

# Group the data by 'travel_agent_used' and 'is_canceled' and calculate the count
cancellations_by_agent = df.groupby(['travel_agent_used', 'is_canceled']).size().reset_index(name='count')

# Map 0 to 'No' and 1 to 'Yes' for 'is_canceled' column
cancellations_by_agent['is_canceled'] = cancellations_by_agent['is_canceled'].replace({0: 'No', 1: 'Yes'})

# Map 0 to 'No' and 1 to 'Yes' for 'travel_agent_used' column
cancellations_by_agent['travel_agent_used'] = cancellations_by_agent['travel_agent_used'].replace({0: 'No', 1: 'Yes'})

# Create a bar plot
bar_plot = alt.Chart(cancellations_by_agent).mark_bar().encode(
    x=alt.X('travel_agent_used:N', axis=alt.Axis(labels=False, ticks=False, title='')),
    y=alt.Y('count:Q', title='Count'),
    color=alt.Color('travel_agent_used:N'),
    column=alt.Column('is_canceled:N', title='Canceled')
).properties(
    title=alt.TitleParams('Cancellations by Travel Agent Used', 
                          anchor='middle', 
                          baseline='bottom')
)

bar_plot