# Exploring the Hotel-Level Data

Here, we convert all reservations to hotel nightly sales statistics using the `parse_dates` and `res_to_stats` functions in `utils.py`.

We'll start by deriving some basic information about each hotel, including:
* **Capacity** (total number of rooms)
* **Occupancy** (rooms sold / capacity)

Then, we'll pull more statistics into the `stats` DataFrames:
* **Revenue and Rooms Sold by Customer Segment**
* **ADR by Customer Segment**

These stats will help us understand what kind of hotels we're working with.

In [1]:
import pandas as pd
import numpy as np

from utils import generate_hotel_dfs

pd.options.display.max_rows = 30
pd.options.display.max_columns = 60

In [2]:
h1_res, h1_dbd = generate_hotel_dfs('../data/H1.csv')

In [3]:
h2_res, h2_dbd = generate_hotel_dfs('../data/H2.csv')

In [4]:
h1_dbd.to_pickle("pickle/h1_dbd.pick")
h2_dbd.to_pickle("pickle/h2_dbd.pick")
h1_res.to_pickle("pickle/h1_res.pick")
h2_res.to_pickle("pickle/h2_res.pick")
# h1_dbds = pd.read_pickle("pickle/h1_dbds.pick")
# h2_dbds = pd.read_pickle("pickle/h2_dbds.pick")

In [5]:
h1_res

Unnamed: 0,ResNum,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,Babies,Meal,Country,MarketSegment,DistributionChannel,IsRepeatedGuest,PreviousCancellations,PreviousBookingsNotCanceled,ReservedRoomType,AssignedRoomType,BookingChanges,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,ArrivalDate,LOS,is_grp,is_trn,is_trnP,No-Show,Corporate,Direct,Groups,Offline TA/TO,Online TA,TA/TO,Undefined,DT_NonRefundable,DT_Refundable,AgencyBooking,CompanyListed
0,0,0,342,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,3,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0,0,1,0,0,0,0,0,0,0,False,False
1,1,0,737,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,4,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0,0,1,0,0,0,0,0,0,0,False,False
2,2,0,7,2015,July,27,1,0,1,1,0,0,BB,GBR,Direct,Direct,0,0,0,A,C,0,No Deposit,,,0,Transient,75.00,0,0,Check-Out,2015-07-02,2015-07-01,1,0,1,0,0,0,1,0,0,0,0,0,0,0,False,False
3,3,0,13,2015,July,27,1,0,1,1,0,0,BB,GBR,Corporate,Corporate,0,0,0,A,A,0,No Deposit,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02,2015-07-01,1,0,1,0,0,1,0,0,0,0,0,0,0,0,True,False
4,4,0,14,2015,July,27,1,0,2,2,0,0,BB,GBR,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03,2015-07-01,2,0,1,0,0,0,0,0,0,1,1,0,0,0,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40055,40055,0,212,2017,August,35,31,2,8,2,1,0,BB,GBR,Offline TA/TO,TA/TO,0,0,0,A,A,1,No Deposit,143,,0,Transient,89.75,0,0,Check-Out,2017-09-10,2017-08-31,10,0,1,0,0,0,0,0,1,0,1,0,0,0,True,False
40056,40056,0,169,2017,August,35,30,2,9,2,0,0,BB,IRL,Direct,Direct,0,0,0,E,E,0,No Deposit,250,,0,Transient-Party,202.27,0,1,Check-Out,2017-09-10,2017-08-30,11,0,0,1,0,0,1,0,0,0,0,0,0,0,True,False
40057,40057,0,204,2017,August,35,29,4,10,2,0,0,BB,IRL,Direct,Direct,0,0,0,E,E,0,No Deposit,250,,0,Transient,153.57,0,3,Check-Out,2017-09-12,2017-08-29,14,0,1,0,0,0,1,0,0,0,0,0,0,0,True,False
40058,40058,0,211,2017,August,35,31,4,10,2,0,0,HB,GBR,Offline TA/TO,TA/TO,0,0,0,D,D,0,No Deposit,40,,0,Contract,112.80,0,1,Check-Out,2017-09-14,2017-08-31,14,0,0,0,0,0,0,0,1,0,1,0,0,0,True,False


In [6]:
h1_dbd.head()

Unnamed: 0,DOW,Occ,RoomsSold,ADR,RoomRev,RevPAR,Trn_RoomsSold,Trn_ADR,Trn_RoomRev,Grp_RoomsSold,Grp_ADR,Grp_RoomRev,TrnP_RoomsSold,TrnP_ADR,TrnP_RoomRev,Cnt_RoomsSold,Cnt_ADR,Cnt_RoomRev,WE,WD
2015-07-01,Wed,0.19,36.0,93.25,3356.83,17.95,30.0,95.91,2877.25,0.0,0.0,0.0,0.0,0.0,0.0,6.0,79.93,479.58,False,True
2015-07-02,Thu,0.34,64.0,97.68,6251.33,33.43,43.0,107.71,4631.34,0.0,0.0,0.0,2.0,75.23,150.46,19.0,77.34,1469.53,False,True
2015-07-03,Fri,0.43,81.0,99.41,8052.35,43.06,54.0,109.7,5923.95,0.0,0.0,0.0,2.0,75.23,150.46,25.0,79.12,1977.94,True,False
2015-07-04,Sat,0.58,108.0,96.98,10473.47,56.01,71.0,107.65,7643.08,0.0,0.0,0.0,5.0,74.37,371.86,32.0,76.83,2458.53,True,False
2015-07-05,Sun,0.65,122.0,97.56,11901.72,63.65,79.0,105.76,8354.73,1.0,153.0,153.0,5.0,74.37,371.86,37.0,81.68,3022.13,False,True


In [7]:
[col for col in h1_dbd.columns]

['DOW',
 'Occ',
 'RoomsSold',
 'ADR',
 'RoomRev',
 'RevPAR',
 'Trn_RoomsSold',
 'Trn_ADR',
 'Trn_RoomRev',
 'Grp_RoomsSold',
 'Grp_ADR',
 'Grp_RoomRev',
 'TrnP_RoomsSold',
 'TrnP_ADR',
 'TrnP_RoomRev',
 'Cnt_RoomsSold',
 'Cnt_ADR',
 'Cnt_RoomRev',
 'WE',
 'WD']

In [8]:

h1_dbd

Unnamed: 0,DOW,Occ,RoomsSold,ADR,RoomRev,RevPAR,Trn_RoomsSold,Trn_ADR,Trn_RoomRev,Grp_RoomsSold,Grp_ADR,Grp_RoomRev,TrnP_RoomsSold,TrnP_ADR,TrnP_RoomRev,Cnt_RoomsSold,Cnt_ADR,Cnt_RoomRev,WE,WD
2015-07-01,Wed,0.19,36.0,93.25,3356.83,17.95,30.0,95.91,2877.25,0.0,0.00,0.00,0.0,0.00,0.00,6.0,79.93,479.58,False,True
2015-07-02,Thu,0.34,64.0,97.68,6251.33,33.43,43.0,107.71,4631.34,0.0,0.00,0.00,2.0,75.23,150.46,19.0,77.34,1469.53,False,True
2015-07-03,Fri,0.43,81.0,99.41,8052.35,43.06,54.0,109.70,5923.95,0.0,0.00,0.00,2.0,75.23,150.46,25.0,79.12,1977.94,True,False
2015-07-04,Sat,0.58,108.0,96.98,10473.47,56.01,71.0,107.65,7643.08,0.0,0.00,0.00,5.0,74.37,371.86,32.0,76.83,2458.53,True,False
2015-07-05,Sun,0.65,122.0,97.56,11901.72,63.65,79.0,105.76,8354.73,1.0,153.00,153.00,5.0,74.37,371.86,37.0,81.68,3022.13,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-08-27,Sun,0.96,179.0,192.15,34394.38,183.93,125.0,211.64,26454.38,5.0,152.61,763.03,21.0,180.36,3787.50,28.0,121.05,3389.47,False,True
2017-08-28,Mon,0.93,174.0,190.89,33215.19,177.62,130.0,205.76,26748.82,5.0,146.21,731.03,13.0,205.13,2666.64,26.0,118.03,3068.70,False,True
2017-08-29,Tue,0.93,173.0,185.95,32169.28,172.03,130.0,198.38,25789.27,5.0,146.21,731.03,12.0,208.00,2496.04,26.0,121.27,3152.94,False,True
2017-08-30,Wed,0.93,174.0,176.95,30788.80,164.65,130.0,187.56,24383.42,7.0,135.00,945.03,12.0,210.97,2531.66,25.0,117.15,2928.69,False,True


In [9]:
h1_dbd.describe()

Unnamed: 0,Occ,RoomsSold,ADR,RoomRev,RevPAR,Trn_RoomsSold,Trn_ADR,Trn_RoomRev,Grp_RoomsSold,Grp_ADR,Grp_RoomRev,TrnP_RoomsSold,TrnP_ADR,TrnP_RoomRev,Cnt_RoomsSold,Cnt_ADR,Cnt_RoomRev
count,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0,793.0
mean,0.804968,150.535939,89.606583,14532.738588,77.71546,102.303909,94.391463,10718.656772,0.916772,36.850643,79.692005,30.13367,88.072333,2378.436999,17.181589,63.564023,1355.952812
std,0.218965,40.949113,45.317853,9477.57353,50.682171,32.915559,48.123252,8122.392825,1.424225,53.30396,159.49226,29.042111,51.216005,2024.231358,16.779235,35.211278,1335.427348
min,0.16,29.0,37.88,1215.57,6.5,11.0,38.16,858.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.7,131.0,54.58,7403.05,39.59,75.0,55.62,4542.66,0.0,0.0,0.0,11.0,54.0,868.34,2.0,44.07,80.96
50%,0.92,172.0,70.78,11888.98,63.58,103.0,77.44,7614.72,0.0,0.0,0.0,19.0,68.78,1885.26,12.0,61.39,979.33
75%,0.96,180.0,111.9,19720.26,105.46,130.0,121.44,14543.53,1.0,56.0,85.0,41.0,118.58,3480.63,30.0,80.01,2531.63
max,1.0,187.0,205.41,37692.69,201.57,163.0,225.89,31199.21,11.0,250.3,1199.82,162.0,230.43,12497.41,74.0,146.34,5293.09


In [10]:
h1_dbd.fillna(0, inplace=True)

In [11]:
h2_stats.describe()

NameError: name 'h2_stats' is not defined

## Capacity

EW- ADD TO UTILS.PY

Based on the above tables, we can see the maximum capacity of each hotel.

**H1 (Resort Hotel)'s capacity is 187 rooms.**

**H2 (City Hotel)'s capacity is 226 rooms.**

In [None]:
h1_stats["Occ"] = h1_stats.RoomsSold.astype(float) / 187
# h2_stats["Occ"] = h2_stats.RoomsSold.astype(float) / 226

In [None]:
h1_stats.describe()

In [None]:
h2_stats.describe()

In [None]:
df_h1.head(3)

In [None]:
h1_res_nums = np.array(range(len(df_h1)))
# h2_res_nums = np.array(range(len(df_h2)))
h1_res_nums

In [None]:
df_h1.CustomerType.value_counts()

In [None]:
df_h1['Revenue'] = df_h1.LOS * df_h1.ADR
# df_h2['Revenue'] = df_h2.LOS * df_h2.ADR

In [None]:
mask = df_h1.IsCanceled == 0
df_h1[mask][['CustomerType', 'LOS', 'Revenue']].groupby("CustomerType").agg(sum)

In [None]:
h1_stats

In [None]:
t = (df_h1.groupby("CustomerType")
     .agg({"ADR": "sum", "ResNum": "count"})
     .rename(columns={"ADR":"Revenue", "ResNum": "RoomsSold"}))

In [None]:
import datetime
d1 = datetime.date(2020, 1, 1)
d2 = datetime.date(2020, 4, 1)
all_dates = [datetime.datetime.strftime(d1 + datetime.timedelta(days=x), format="%Y-%m-%d") for x in range((d2-d1).days + 1)]

## Get Same Time Last Year (STLY) Date with Matching DOW

In [14]:
from dateutil.relativedelta import *
import calendar
import datetime

In [25]:
TODAY = datetime.date.today()
dayname = TODAY.strftime("%A")[:2].upper()


FR

In [36]:
# Close, but doesn't work for leap years :(
TODAY+relativedelta(day=4, weekday=2, weeks=-53)

datetime.date(2020, 3, 4)

In [29]:
print(datetime.datetime.now())

 
def lastweeknumberoflastyear():
    return datetime.date(datetime.datetime.now().year-1, 12, 28).isocalendar()[1]

print(datetime.datetime.now() - datetime.timedelta(weeks=lastweeknumberoflastyear()))

2021-03-03 22:06:50.936678
2020-02-26 22:06:50.937071


### The below cell works!!

Just need to figure out how to pass days into this dynamically....SOLVED!
* Use integer instead of MO, Monday/MO/0. 0=MO, 1=TU, and so on.

Fix tomorrow am!

In [35]:
relativedelta?

In [37]:
from datetime import *
from dateutil.relativedelta import *
NOW = datetime.now()
last_wednesday = NOW+relativedelta(years=-1, weekday=2)
last_sunday = NOW+relativedelta(years=-1, weekday=SU)
print(last_wednesday)

2020-03-04 22:16:08.288222
