# Exploring the Hotel-Level Data

Here, we convert all reservations to hotel nightly sales statistics using the `parse_dates` and `res_to_stats` functions in `utils.py`.

We'll start by deriving some basic information about each hotel, including:
* **Capacity** (total number of rooms)
* **Occupancy** (rooms sold / capacity)

Then, we'll pull more statistics into the `stats` DataFrames:
* **Revenue and Rooms Sold by Customer Segment**
* **ADR by Customer Segment**

These stats will help us understand what kind of hotels we're working with.

In [13]:
import pandas as pd
import numpy as np

from utils import parse_dates, add_res_columns, res_to_stats, calculate_stats

In [14]:
h1_res = pd.read_csv("../data/H1.csv")
h2_res = pd.read_csv("../data/H2.csv")

In [15]:
h1_res = parse_dates(h1_res)
h2_res = parse_dates(h2_res)
h1_res = add_res_columns(h1_res)
h2_res = add_res_columns(h2_res)

In [16]:
h1_res.to_pickle("pickle/h1_res.pick")
h2_res.to_pickle("pickle/h2_res.pick")
# h1_res = pd.read_pickle("pickle/h1_res.pick")
# h2_res = pd.read_pickle("pickle/h2_res.pick")

In [18]:
h1_dbds = res_to_stats(h1_res)
h2_dbds = res_to_stats(h2_res)
h1_dbds = calculate_stats(h1_dbds)
h2_dbds = calculate_stats(h2_dbds)

In [19]:
h1_dbds.to_pickle("pickle/h1_dbds.pick")
h2_dbds.to_pickle("pickle/h2_dbds.pick")
# h1_dbds = pd.read_pickle("pickle/h1_dbds.pick")
# h2_dbds = pd.read_pickle("pickle/h2_dbds.pick")

In [8]:
df_h1.columns

Index(['ResNum', 'IsCanceled', 'LeadTime', 'ArrivalDateYear',
       'ArrivalDateMonth', 'ArrivalDateWeekNumber', 'ArrivalDateDayOfMonth',
       'StaysInWeekendNights', 'StaysInWeekNights', 'Adults', 'Children',
       'Babies', 'Meal', 'Country', 'MarketSegment', 'DistributionChannel',
       'IsRepeatedGuest', 'PreviousCancellations',
       'PreviousBookingsNotCanceled', 'ReservedRoomType', 'AssignedRoomType',
       'BookingChanges', 'DepositType', 'Agent', 'Company',
       'DaysInWaitingList', 'CustomerType', 'ADR', 'RequiredCarParkingSpaces',
       'TotalOfSpecialRequests', 'ReservationStatus', 'ReservationStatusDate',
       'ArrivalDate', 'LOS'],
      dtype='object')

In [9]:
h1_stats.head()

Unnamed: 0,DOW,RoomsSold,RoomRev,Trn_RoomsSold,Trn_RoomRev,Cnt_RoomsSold,Cnt_RoomRev,TrnP_RoomsSold,TrnP_RoomRev,Grp_RoomsSold,Grp_RoomRev,ADR,Trn_ADR,TrnP_ADR,Grp_ADR,Cnt_ADR,WE,WD
2015-07-01,Wed,36.0,3356.83,30.0,2877.25,6.0,479.58,,,,,93.25,95.91,,,79.93,False,True
2015-07-02,Thu,64.0,6251.33,43.0,4631.34,19.0,1469.53,2.0,150.46,,,97.68,107.71,75.23,,77.34,False,True
2015-07-03,Fri,81.0,8052.35,54.0,5923.95,25.0,1977.94,2.0,150.46,,,99.41,109.7,75.23,,79.12,True,False
2015-07-04,Sat,108.0,10473.47,71.0,7643.08,32.0,2458.53,5.0,371.86,,,96.98,107.65,74.37,,76.83,True,False
2015-07-05,Sun,122.0,11901.72,79.0,8354.73,37.0,3022.13,5.0,371.86,1.0,153.0,97.56,105.76,74.37,153.0,81.68,False,True


In [10]:
h1_stats.describe()

Unnamed: 0,RoomsSold,RoomRev,Trn_RoomsSold,Trn_RoomRev,Cnt_RoomsSold,Cnt_RoomRev,TrnP_RoomsSold,TrnP_RoomRev,Grp_RoomsSold,Grp_RoomRev,ADR,Trn_ADR,TrnP_ADR,Grp_ADR,Cnt_ADR
count,793.0,793.0,793.0,793.0,703.0,703.0,770.0,770.0,368.0,368.0,793.0,793.0,770.0,368.0,703.0
mean,150.535939,14532.738588,102.303909,10718.656772,19.381223,1529.545633,31.033766,2449.481221,1.975543,171.727609,89.606583,94.391463,90.703065,79.40913,71.701664
std,40.949113,9477.57353,32.915559,8122.392825,16.581485,1321.403239,28.994846,2011.421687,1.509985,197.604611,45.317853,48.123252,49.624662,52.372884,28.538983
min,29.0,1215.57,11.0,858.7,1.0,26.1,1.0,0.0,1.0,0.0,37.88,38.16,0.0,0.0,26.0
25%,131.0,7403.05,75.0,4542.66,3.0,150.1,12.0,997.75,1.0,50.0,54.58,55.62,55.31,41.65,52.15
50%,172.0,11888.98,103.0,7614.72,16.0,1322.67,20.0,1982.43,1.0,99.5,70.78,77.44,69.39,62.6,65.34
75%,180.0,19720.26,130.0,14543.53,32.0,2657.12,42.0,3559.3025,2.0,223.0,111.9,121.44,120.4325,118.18,82.81
max,187.0,37692.69,163.0,31199.21,74.0,5293.09,162.0,12497.41,11.0,1199.82,205.41,225.89,230.43,250.3,146.34


In [12]:
h2_stats.describe()

Unnamed: 0,RoomsSold,RoomRev,Trn_RoomsSold,Trn_RoomRev,TrnP_RoomsSold,TrnP_RoomRev,Grp_RoomsSold,Grp_RoomRev,Cnt_RoomsSold,Cnt_RoomRev,ADR,Trn_ADR,TrnP_ADR,Grp_ADR,Cnt_ADR
count,793.0,793.0,793.0,793.0,761.0,761.0,306.0,306.0,343.0,343.0,793.0,793.0,761.0,306.0,343.0
mean,170.112232,18109.811488,122.211854,13553.924401,44.194481,4149.830552,2.271242,185.691176,10.664723,1160.279621,100.873493,102.975233,92.126399,83.663235,98.07895
std,59.5704,8394.756685,58.875086,7892.108697,34.695351,3621.360711,2.077583,174.598853,13.087698,1535.684718,21.46198,23.995806,20.973691,35.556704,21.481964
min,2.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,55.0,0.0,0.0,0.0,0.0,55.0
25%,131.0,11486.83,72.0,6461.47,19.0,1503.0,1.0,88.0,1.0,136.0,84.89,85.18,76.6,62.5525,81.345
50%,196.0,19303.98,134.0,14087.91,36.0,3399.43,2.0,121.2,3.0,306.0,98.36,101.64,90.36,82.44,97.97
75%,217.0,25207.42,173.0,20423.56,59.0,5471.01,2.0,235.3225,17.0,1889.37,117.22,122.79,106.27,100.0375,110.0
max,226.0,33900.91,218.0,30347.02,213.0,28626.74,12.0,1172.05,51.0,6240.76,164.23,166.5,164.7,270.0,172.8


## Capacity

EW- ADD TO UTILS.PY

Based on the above tables, we can see the maximum capacity of each hotel.

**H1 (Resort Hotel)'s capacity is 187 rooms.**

**H2 (City Hotel)'s capacity is 226 rooms.**

In [None]:
h1_stats["Occ"] = h1_stats.RoomsSold.astype(float) / 187
# h2_stats["Occ"] = h2_stats.RoomsSold.astype(float) / 226

In [None]:
h1_stats.describe()

In [None]:
h2_stats.describe()

In [None]:
df_h1.head(3)

In [None]:
h1_res_nums = np.array(range(len(df_h1)))
# h2_res_nums = np.array(range(len(df_h2)))
h1_res_nums

In [None]:
df_h1.CustomerType.value_counts()

In [None]:
df_h1['Revenue'] = df_h1.LOS * df_h1.ADR
# df_h2['Revenue'] = df_h2.LOS * df_h2.ADR

In [None]:
mask = df_h1.IsCanceled == 0
df_h1[mask][['CustomerType', 'LOS', 'Revenue']].groupby("CustomerType").agg(sum)

In [None]:
h1_stats

In [None]:
t = (df_h1.groupby("CustomerType")
     .agg({"ADR": "sum", "ResNum": "count"})
     .rename(columns={"ADR":"Revenue", "ResNum": "RoomsSold"}))

In [None]:
t

In [None]:
t.loc["Contract", "ADR"]

In [None]:
import datetime
d1 = datetime.date(2020, 1, 1)
d2 = datetime.date(2020, 4, 1)
all_dates = [datetime.datetime.strftime(d1 + datetime.timedelta(days=x), format="%Y-%m-%d") for x in range((d2-d1).days + 1)]

In [None]:
dd

In [None]:
t = pd.DataFrame([['Elliot', 'Jim', "Paco", "Elliot"], [1, 1, 1, 1]]).transpose()

In [None]:
t

In [None]:
tg = t.groupby(0).agg(sum)

In [None]:
'Elliot' in list(tg.index)