# Preparing our Dataset to Model Demand

In [1]:
import os
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.preprocessing import OneHotEncoder as ohe

from dbds import generate_hotel_dfs
from agg import prep_demand_features
from agg_utils import stly_cols_agg, ly_cols_agg, drop_cols_agg, stly_pace_cols, ty_pace_cols

pd.options.display.max_rows = 150
pd.options.display.max_columns = 250
pd.options.display.max_colwidth = None

DATE_FMT = "%Y-%m-%d"
h1_capacity = 187
h2_capacity = 226
AOD = "2017-08-01"
AOD_dt = pd.to_datetime(AOD)

h1_res = pd.read_pickle("pickle/h1_res.pick")
h2_res = pd.read_pickle("pickle/h2_res.pick")
h1_dbd = pd.read_pickle("pickle/h1_dbd.pick")
h2_dbd = pd.read_pickle("pickle/h2_dbd.pick")

In [2]:
# h1_res, h1_dbd = generate_hotel_dfs("../data/H1.csv", capacity=h1_capacity)
# h2_res, h2_dbd = generate_hotel_dfs("../data/H2.csv", capacity=h2_capacity)

# h1_res.to_pickle("pickle/h1_res.pick")
# h1_dbd.to_pickle("pickle/h1_dbd.pick")
# h2_res.to_pickle("pickle/h2_res.pick")
# h2_dbd.to_pickle("pickle/h2_dbd.pick")

## Combine Files Generated by save_sims.py

In [4]:
# generate list of relevant files
import datetime as dt
hotel_num = 1
h = 'h' + str(hotel_num)
SIM_AOD = pd.to_datetime(dt.date(2017, 8, 1), format=DATE_FMT)
sim_start = SIM_AOD - pd.DateOffset(365*2) # need > 364 days of actuals for each date, the rest future-looking

FOLDER = "./sims/pickle/"
lam_include = lambda x: x[:2] == h and pd.to_datetime(x[7:17]) >= sim_start
h1_files = [f for f in os.listdir(FOLDER) if lam_include(f)]
h1_files.sort()
len(h1_files), h1_files[0], h1_files[-1] # note STLY date of 8/1/17 == 8/2/16 (matching weekday)

(761, 'h1_sim_2015-08-02.pick', 'h1_sim_2017-08-31.pick')

In [5]:
%%time
df_sim = pd.DataFrame()
for otb_data in h1_files:
    df_sim = df_sim.append(pd.read_pickle(FOLDER + otb_data))

df_sim.shape

CPU times: user 7.97 s, sys: 129 ms, total: 8.1 s
Wall time: 9.16 s


(23856, 54)

## Adding calculated features

In [6]:
# Add AsOfDate

def apply_aod(row):
    stay_date = pd.to_datetime(row["Date"])
    stly_stay_date = pd.to_datetime(row["STLY_Date"])
    n_days_b4 = int(row["DaysUntilArrival"])
    as_of_date = pd.to_datetime(stay_date - pd.DateOffset(n_days_b4), format=DATE_FMT)
    stly_as_of_date = pd.to_datetime(stly_stay_date - pd.DateOffset(n_days_b4), format=DATE_FMT)
    return as_of_date, stly_as_of_date

df_sim[["AsOfDate","STLY_AsOfDate"]] = df_sim[["Date", "STLY_Date", "DaysUntilArrival"]].apply(apply_aod, axis=1, result_type='expand')
df_sim.rename(columns={"Date": "StayDate", "STLY_Date": "STLY_StayDate"}, inplace=True)

df_sim.head()

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,TRNP_RoomsOTB,TRNP_RevOTB,TRNP_CxlForecast,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,WE,WD,ADR_OTB,AsOfDate,STLY_AsOfDate
2015-08-02,Sun,236.0,34425.02,22.0,187.0,28680.27,21.0,19.0,2432.02,1.0,0.0,0.0,0.0,30.0,3312.73,0.0,2015-08-02,2014-08-03,0,68.0,153.37,161.0,22662.86,119.0,17749.28,15.0,1909.3,0.0,0.0,27.0,3004.28,161.0,22923.55,120.0,18116.97,15.0,1909.3,0.0,0.0,26.0,2897.28,162.0,23255.24,123.0,18769.64,14.0,1710.3,0.0,0.0,25.0,2775.3,,,,2015-08-02,2014-08-03
2015-08-03,Mon,232.0,34800.06,26.0,175.0,28062.25,25.0,20.0,2536.0,1.0,0.0,0.0,0.0,37.0,4201.81,0.0,2015-08-03,2014-08-04,1,54.0,160.36,172.0,24998.88,124.0,19419.79,16.0,2013.28,0.0,0.0,32.0,3565.81,168.0,24572.57,121.0,19116.48,16.0,2013.28,0.0,0.0,31.0,3442.81,173.0,25712.31,128.0,20577.2,15.0,1814.28,0.0,0.0,30.0,3320.83,,,,2015-08-02,2014-08-03
2015-08-04,Tue,240.0,36591.77,26.0,180.0,29365.14,25.0,21.0,2846.48,1.0,0.0,0.0,0.0,39.0,4380.15,0.0,2015-08-04,2014-08-05,2,57.0,163.14,177.0,25901.61,126.0,20058.58,17.0,2098.88,0.0,0.0,34.0,3744.15,172.0,25296.7,122.0,19576.67,17.0,2098.88,0.0,0.0,33.0,3621.15,176.0,26399.13,126.0,20539.08,18.0,2360.88,0.0,0.0,32.0,3499.17,,,,2015-08-02,2014-08-03
2015-08-05,Wed,241.0,37252.33,28.0,187.0,30654.82,27.0,19.0,2621.76,1.0,0.0,0.0,0.0,35.0,3975.75,0.0,2015-08-05,2014-08-06,3,57.0,163.93,174.0,25803.81,128.0,20474.4,15.0,1874.16,0.0,0.0,31.0,3455.25,174.0,26247.68,128.0,20820.72,15.0,1874.16,0.0,0.0,31.0,3552.8,174.0,26349.33,129.0,21002.9,16.0,2136.16,0.0,0.0,29.0,3210.27,,,,2015-08-02,2014-08-03
2015-08-06,Thu,237.0,36957.08,30.0,185.0,30518.72,27.0,22.0,2992.26,3.0,0.0,0.0,0.0,30.0,3446.1,0.0,2015-08-06,2014-08-07,4,56.0,164.97,169.0,25175.89,125.0,20005.63,18.0,2244.66,0.0,0.0,26.0,2925.6,174.0,26695.28,129.0,21088.45,20.0,2705.66,0.0,0.0,25.0,2901.17,176.0,27065.32,133.0,21878.04,19.0,2506.66,0.0,0.0,24.0,2680.62,,,,2015-08-02,2014-08-03


In [7]:
df_sim.shape

(23856, 56)

In [8]:
# add remaining supply ('RemSupply')
capacity = 187
df_sim["RemSupply"] = (
    capacity - df_sim.RoomsOTB.astype(int) + df_sim.CxlForecast.astype(int)
)

In [9]:
df_sim.shape

(23856, 57)

In [10]:
# add one-hot-encoded DOW ('Day of Week') columns

ohe_dow = pd.get_dummies(df_sim.DOW, drop_first=True)
dow_ohe_cols = list(ohe_dow.columns)
df_sim[dow_ohe_cols] = ohe_dow

In [11]:
df_sim.shape

(23856, 63)

In [12]:
# add NONTRN cols

df_sim["NONTRN_RoomsOTB"] = (
    df_sim.RoomsOTB - df_sim.TRN_RoomsOTB
)
df_sim["NONTRN_RevOTB"] = df_sim.RevOTB - df_sim.TRN_RevOTB
df_sim["NONTRN_ADR_OTB"] = round(df_sim["NONTRN_RevOTB"] / df_sim["NONTRN_RoomsOTB"], 2)
df_sim["NONTRN_CxlForecast"] = df_sim.CxlForecast - df_sim.TRN_CxlForecast

# df_sim["LYA_NONTRN_RoomsOTB"] = (
#     df_sim.LYA_TRNP_RoomsOTB + df_sim.LYA_GRP_RoomsOTB + df_sim.LYA_CNT_RoomsOTB
# )
# df_sim["LYA_NONTRN_RevOTB"] = df_sim.LYA_TRNP_RevOTB + df_sim.LYA_GRP_RevOTB + df_sim.LYA_CNT_RevOTB



In [13]:
df_sim.shape

(23856, 67)

In [14]:
len(ly_cols_agg)

7

In [15]:
tuple(np.zeros(7))

(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)

In [16]:
# Add last-year actual columns ("LYA_")

def apply_ly_cols(row):
    stly_date = row["STLY_StayDate"]
    if pd.to_datetime(stly_date) < dt.date(2015, 8, 1):
        return tuple(np.zeros(len(ly_cols_agg)))
    stly_date_str = dt.datetime.strftime(stly_date, format=DATE_FMT)

    df_lya = list(h1_dbd.loc[stly_date_str, ly_cols_agg])
    return tuple(df_lya)

ly_new_cols = ["LYA_" + col for col in ly_cols_agg]
df_sim[ly_new_cols] = df_sim[["STLY_StayDate"]].apply(apply_ly_cols, axis=1, result_type="expand")

df_sim.fillna(0, inplace=True)

df_sim.tail()

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,TRNP_RoomsOTB,TRNP_RevOTB,TRNP_CxlForecast,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,WE,WD,ADR_OTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev
2017-08-30,Wed,173.0,30619.25,17.0,128.0,24078.87,16.0,12.0,2531.66,0.0,7.0,945.03,0.0,26.0,3063.69,1.0,2017-08-30,2016-08-31,1,0.0,201.06,159.0,28294.17,118.0,22246.79,11.0,2394.66,5.0,724.03,25.0,2928.69,164.0,28930.08,119.0,22389.7,12.0,2531.66,7.0,945.03,26.0,3063.69,173.0,30708.25,128.0,24167.87,12.0,2531.66,7.0,945.03,26.0,3063.69,False,False,176.99,2017-08-29,2016-08-30,31,0,0,0,0,0,1,45.0,6540.38,145.34,1.0,171.0,137.88,23577.22,76.0,126.0,151.21,19052.76
2017-08-31,Thu,170.0,29249.15,17.0,127.0,23391.07,16.0,12.0,2227.16,0.0,6.0,888.73,0.0,25.0,2742.19,1.0,2017-08-31,2016-09-01,2,0.0,197.0,148.0,25598.71,116.0,21552.53,5.0,908.66,4.0,598.73,23.0,2538.79,158.0,26730.7,117.0,21246.62,10.0,1853.16,6.0,888.73,25.0,2742.19,164.0,27966.82,123.0,22482.74,10.0,1853.16,6.0,888.73,25.0,2742.19,False,False,172.05,2017-08-29,2016-08-30,34,0,0,0,1,0,0,43.0,5858.08,136.23,1.0,183.0,133.07,24351.32,92.0,132.0,145.19,19164.66
2017-08-30,Wed,174.0,30788.8,26.0,130.0,24383.42,26.0,12.0,2531.66,0.0,7.0,945.03,0.0,25.0,2928.69,0.0,2017-08-30,2016-08-31,0,0.0,201.06,159.0,28294.17,118.0,22246.79,11.0,2394.66,5.0,724.03,25.0,2928.69,164.0,28930.08,119.0,22389.7,12.0,2531.66,7.0,945.03,26.0,3063.69,173.0,30708.25,128.0,24167.87,12.0,2531.66,7.0,945.03,26.0,3063.69,False,False,176.95,2017-08-30,2016-08-31,39,0,0,0,0,0,1,44.0,6405.38,145.58,0.0,171.0,137.88,23577.22,76.0,126.0,151.21,19052.76
2017-08-31,Thu,171.0,29217.7,29.0,129.0,23494.62,29.0,12.0,2227.16,0.0,6.0,888.73,0.0,24.0,2607.19,0.0,2017-08-31,2016-09-01,1,0.0,197.0,148.0,25598.71,116.0,21552.53,5.0,908.66,4.0,598.73,23.0,2538.79,158.0,26730.7,117.0,21246.62,10.0,1853.16,6.0,888.73,25.0,2742.19,164.0,27966.82,123.0,22482.74,10.0,1853.16,6.0,888.73,25.0,2742.19,False,False,170.86,2017-08-30,2016-08-31,45,0,0,0,1,0,0,42.0,5723.08,136.26,0.0,183.0,133.07,24351.32,92.0,132.0,145.19,19164.66
2017-08-31,Thu,172.0,29461.7,16.0,130.0,23738.62,16.0,12.0,2227.16,0.0,6.0,888.73,0.0,24.0,2607.19,0.0,2017-08-31,2016-09-01,0,0.0,197.0,148.0,25598.71,116.0,21552.53,5.0,908.66,4.0,598.73,23.0,2538.79,158.0,26730.7,117.0,21246.62,10.0,1853.16,6.0,888.73,25.0,2742.19,164.0,27966.82,123.0,22482.74,10.0,1853.16,6.0,888.73,25.0,2742.19,False,False,171.29,2017-08-31,2016-09-01,31,0,0,0,1,0,0,42.0,5723.08,136.26,0.0,183.0,133.07,24351.32,92.0,132.0,145.19,19164.66


In [17]:
actual_cols = ['RoomsSold', "ADR", "RoomRev", "NumCancels"]
def apply_ty_actuals(row):
    date = row["StayDate"]
    date_str = dt.datetime.strftime(date, format=DATE_FMT)
    results = list(h1_dbd.loc[date_str, actual_cols])
    return tuple(results)

new_actual_cols = ["ACTUAL_" + col for col in actual_cols]
df_sim[new_actual_cols] = df_sim[["StayDate"]].apply(apply_ty_actuals, axis=1, result_type="expand")

df_sim.fillna(0, inplace=True)

df_sim.tail()

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,TRNP_RoomsOTB,TRNP_RevOTB,TRNP_CxlForecast,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,WE,WD,ADR_OTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels
2017-08-30,Wed,173.0,30619.25,17.0,128.0,24078.87,16.0,12.0,2531.66,0.0,7.0,945.03,0.0,26.0,3063.69,1.0,2017-08-30,2016-08-31,1,0.0,201.06,159.0,28294.17,118.0,22246.79,11.0,2394.66,5.0,724.03,25.0,2928.69,164.0,28930.08,119.0,22389.7,12.0,2531.66,7.0,945.03,26.0,3063.69,173.0,30708.25,128.0,24167.87,12.0,2531.66,7.0,945.03,26.0,3063.69,False,False,176.99,2017-08-29,2016-08-30,31,0,0,0,0,0,1,45.0,6540.38,145.34,1.0,171.0,137.88,23577.22,76.0,126.0,151.21,19052.76,174.0,176.95,30788.8,109.0
2017-08-31,Thu,170.0,29249.15,17.0,127.0,23391.07,16.0,12.0,2227.16,0.0,6.0,888.73,0.0,25.0,2742.19,1.0,2017-08-31,2016-09-01,2,0.0,197.0,148.0,25598.71,116.0,21552.53,5.0,908.66,4.0,598.73,23.0,2538.79,158.0,26730.7,117.0,21246.62,10.0,1853.16,6.0,888.73,25.0,2742.19,164.0,27966.82,123.0,22482.74,10.0,1853.16,6.0,888.73,25.0,2742.19,False,False,172.05,2017-08-29,2016-08-30,34,0,0,0,1,0,0,43.0,5858.08,136.23,1.0,183.0,133.07,24351.32,92.0,132.0,145.19,19164.66,170.0,171.07,29082.2,101.0
2017-08-30,Wed,174.0,30788.8,26.0,130.0,24383.42,26.0,12.0,2531.66,0.0,7.0,945.03,0.0,25.0,2928.69,0.0,2017-08-30,2016-08-31,0,0.0,201.06,159.0,28294.17,118.0,22246.79,11.0,2394.66,5.0,724.03,25.0,2928.69,164.0,28930.08,119.0,22389.7,12.0,2531.66,7.0,945.03,26.0,3063.69,173.0,30708.25,128.0,24167.87,12.0,2531.66,7.0,945.03,26.0,3063.69,False,False,176.95,2017-08-30,2016-08-31,39,0,0,0,0,0,1,44.0,6405.38,145.58,0.0,171.0,137.88,23577.22,76.0,126.0,151.21,19052.76,174.0,176.95,30788.8,109.0
2017-08-31,Thu,171.0,29217.7,29.0,129.0,23494.62,29.0,12.0,2227.16,0.0,6.0,888.73,0.0,24.0,2607.19,0.0,2017-08-31,2016-09-01,1,0.0,197.0,148.0,25598.71,116.0,21552.53,5.0,908.66,4.0,598.73,23.0,2538.79,158.0,26730.7,117.0,21246.62,10.0,1853.16,6.0,888.73,25.0,2742.19,164.0,27966.82,123.0,22482.74,10.0,1853.16,6.0,888.73,25.0,2742.19,False,False,170.86,2017-08-30,2016-08-31,45,0,0,0,1,0,0,42.0,5723.08,136.26,0.0,183.0,133.07,24351.32,92.0,132.0,145.19,19164.66,170.0,171.07,29082.2,101.0
2017-08-31,Thu,172.0,29461.7,16.0,130.0,23738.62,16.0,12.0,2227.16,0.0,6.0,888.73,0.0,24.0,2607.19,0.0,2017-08-31,2016-09-01,0,0.0,197.0,148.0,25598.71,116.0,21552.53,5.0,908.66,4.0,598.73,23.0,2538.79,158.0,26730.7,117.0,21246.62,10.0,1853.16,6.0,888.73,25.0,2742.19,164.0,27966.82,123.0,22482.74,10.0,1853.16,6.0,888.73,25.0,2742.19,False,False,171.29,2017-08-31,2016-09-01,31,0,0,0,1,0,0,42.0,5723.08,136.26,0.0,183.0,133.07,24351.32,92.0,132.0,145.19,19164.66,170.0,171.07,29082.2,101.0


In [18]:
mask = df_sim.StayDate == '2017-08-09'
df_sim[mask][["ACTUAL_RoomsSold"]]

Unnamed: 0,ACTUAL_RoomsSold
2017-08-09,183.0
2017-08-09,183.0
2017-08-09,183.0
2017-08-09,183.0
2017-08-09,183.0
2017-08-09,183.0
2017-08-09,183.0
2017-08-09,183.0
2017-08-09,183.0
2017-08-09,183.0


In [19]:
h1_dbd.loc["2017-08-09"]

DOW                                 Wed
Occ                                0.98
RoomsSold                         183.0
ADR                              201.27
RoomRev                        36832.94
RevPAR                           196.97
NumCancels                        132.0
TRN_RoomsSold                     125.0
TRN_ADR                          218.29
TRN_RoomRev                    27285.73
GRP_RoomsSold                       7.0
GRP_ADR                           127.0
GRP_RoomRev                      889.02
TRNP_RoomsSold                     26.0
TRNP_ADR                         194.72
TRNP_RoomRev                    5062.63
CNT_RoomsSold                      25.0
CNT_ADR                          143.82
CNT_RoomRev                     3595.56
WE                                False
WD                                 True
STLY_Date           2016-08-10 00:00:00
NONTRN_RoomsSold                   58.0
NONTRN_RoomRev                  9547.21
NONTRN_ADR                       164.61


In [20]:
h1_dbd.columns

Index(['DOW', 'Occ', 'RoomsSold', 'ADR', 'RoomRev', 'RevPAR', 'NumCancels',
       'TRN_RoomsSold', 'TRN_ADR', 'TRN_RoomRev', 'GRP_RoomsSold', 'GRP_ADR',
       'GRP_RoomRev', 'TRNP_RoomsSold', 'TRNP_ADR', 'TRNP_RoomRev',
       'CNT_RoomsSold', 'CNT_ADR', 'CNT_RoomRev', 'WE', 'WD', 'STLY_Date',
       'NONTRN_RoomsSold', 'NONTRN_RoomRev', 'NONTRN_ADR'],
      dtype='object')

In [21]:
df_sim.shape

(23856, 78)

In [22]:
# get recent pickup (tminus) columns


# first need TRN_ADR

df_sim["TRN_ADR_OTB"] = round(df_sim["TRN_RevOTB"] / df_sim["TRN_RoomsOTB"])

# loop thru tminus windows (for total hotel & trn) & count bookings
tms = ["TM30_", "TM15_", "TM05_"]
segs = ["", "TRN_"] # "" for total hotel

for tm in tms:
    for seg in segs:  
        # add tm_seg_adr
        df_sim[tm + seg + "ADR_OTB"] = round(df_sim[tm + seg + "RevOTB"] / df_sim[tm + seg + "RoomsOTB"], 2)
        # and now segmented stats
        df_sim[tm + seg + "RoomsPickup"] = round(
            df_sim[seg + "RoomsOTB"] - df_sim[tm + seg + "RoomsOTB"], 2
        )
        df_sim[tm + seg + "RevPickup"] = round(
            df_sim[seg + "RevOTB"] - df_sim[tm + seg + "RevOTB"], 2
        )
        df_sim[tm + seg + "ADR_Pickup"] = round(
            df_sim[seg + "ADR_OTB"] - df_sim[tm + seg + "ADR_OTB"], 2
        )
    # back to outside loop (iterating thru tms)
    # add TM_NONTRN_OTB
    df_sim[tm +  "NONTRN_RoomsOTB"] = (
        df_sim[tm + "RoomsOTB"]
        - df_sim[tm + "TRN_RoomsOTB"]
    )
    df_sim[tm + "NONTRN_RevOTB"] = (
        df_sim[tm + "RevOTB"]
        - df_sim[tm + "TRN_RevOTB"]
    )
    df_sim[tm + "NONTRN_ADR_OTB"] = round(
        df_sim[tm + "NONTRN_RevOTB"] / df_sim[tm + "NONTRN_RoomsOTB"], 2
    )
    # add TM_NONTRN_OTB Pickup
    df_sim[tm +  "NONTRN_RoomsPickup"] = (
        df_sim["NONTRN_RoomsOTB"]
        - df_sim[tm + "NONTRN_RoomsOTB"]
    )
    df_sim[tm + "NONTRN_RevPickup"] = (
        df_sim["NONTRN_RevOTB"]
        - df_sim[tm + "NONTRN_RevOTB"]
    )
    df_sim[tm + "NONTRN_ADR_Pickup"] = (
        df_sim["NONTRN_ADR_OTB"]
        - df_sim[tm + "NONTRN_ADR_OTB"]
    )

df_sim.head()

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,TRNP_RoomsOTB,TRNP_RevOTB,TRNP_CxlForecast,GRP_RoomsOTB,GRP_RevOTB,GRP_CxlForecast,CNT_RoomsOTB,CNT_RevOTB,CNT_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,TM30_RoomsOTB,TM30_RevOTB,TM30_TRN_RoomsOTB,TM30_TRN_RevOTB,TM30_TRNP_RoomsOTB,TM30_TRNP_RevOTB,TM30_GRP_RoomsOTB,TM30_GRP_RevOTB,TM30_CNT_RoomsOTB,TM30_CNT_RevOTB,TM15_RoomsOTB,TM15_RevOTB,TM15_TRN_RoomsOTB,TM15_TRN_RevOTB,TM15_TRNP_RoomsOTB,TM15_TRNP_RevOTB,TM15_GRP_RoomsOTB,TM15_GRP_RevOTB,TM15_CNT_RoomsOTB,TM15_CNT_RevOTB,TM05_RoomsOTB,TM05_RevOTB,TM05_TRN_RoomsOTB,TM05_TRN_RevOTB,TM05_TRNP_RoomsOTB,TM05_TRNP_RevOTB,TM05_GRP_RoomsOTB,TM05_GRP_RevOTB,TM05_CNT_RoomsOTB,TM05_CNT_RevOTB,WE,WD,ADR_OTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,LYA_RoomsSold,LYA_ADR,LYA_RoomRev,LYA_NumCancels,LYA_TRN_RoomsSold,LYA_TRN_ADR,LYA_TRN_RoomRev,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels,TRN_ADR_OTB,TM30_ADR_OTB,TM30_RoomsPickup,TM30_RevPickup,TM30_ADR_Pickup,TM30_TRN_ADR_OTB,TM30_TRN_RoomsPickup,TM30_TRN_RevPickup,TM30_TRN_ADR_Pickup,TM30_NONTRN_RoomsOTB,TM30_NONTRN_RevOTB,TM30_NONTRN_ADR_OTB,TM30_NONTRN_RoomsPickup,TM30_NONTRN_RevPickup,TM30_NONTRN_ADR_Pickup,TM15_ADR_OTB,TM15_RoomsPickup,TM15_RevPickup,TM15_ADR_Pickup,TM15_TRN_ADR_OTB,TM15_TRN_RoomsPickup,TM15_TRN_RevPickup,TM15_TRN_ADR_Pickup,TM15_NONTRN_RoomsOTB,TM15_NONTRN_RevOTB,TM15_NONTRN_ADR_OTB,TM15_NONTRN_RoomsPickup,TM15_NONTRN_RevPickup,TM15_NONTRN_ADR_Pickup,TM05_ADR_OTB,TM05_RoomsPickup,TM05_RevPickup,TM05_ADR_Pickup,TM05_TRN_ADR_OTB,TM05_TRN_RoomsPickup,TM05_TRN_RevPickup,TM05_TRN_ADR_Pickup,TM05_NONTRN_RoomsOTB,TM05_NONTRN_RevOTB,TM05_NONTRN_ADR_OTB,TM05_NONTRN_RoomsPickup,TM05_NONTRN_RevPickup,TM05_NONTRN_ADR_Pickup
2015-08-02,Sun,236.0,34425.02,22.0,187.0,28680.27,21.0,19.0,2432.02,1.0,0.0,0.0,0.0,30.0,3312.73,0.0,2015-08-02,2014-08-03,0,68.0,153.37,161.0,22662.86,119.0,17749.28,15.0,1909.3,0.0,0.0,27.0,3004.28,161.0,22923.55,120.0,18116.97,15.0,1909.3,0.0,0.0,26.0,2897.28,162.0,23255.24,123.0,18769.64,14.0,1710.3,0.0,0.0,25.0,2775.3,0,0,0.0,2015-08-02,2014-08-03,-27,0,0,1,0,0,0,49.0,5744.75,117.24,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,168.0,144.92,24346.11,68.0,153.0,140.76,75.0,11762.16,-140.76,149.15,68.0,10930.99,3.85,42.0,4913.58,116.99,7.0,831.17,0.25,142.38,75.0,11501.47,-142.38,150.97,67.0,10563.3,2.03,41.0,4806.58,117.23,8.0,938.17,0.01,143.55,74.0,11169.78,-143.55,152.6,64.0,9910.63,0.4,39.0,4485.6,115.02,10.0,1259.15,2.22
2015-08-03,Mon,232.0,34800.06,26.0,175.0,28062.25,25.0,20.0,2536.0,1.0,0.0,0.0,0.0,37.0,4201.81,0.0,2015-08-03,2014-08-04,1,54.0,160.36,172.0,24998.88,124.0,19419.79,16.0,2013.28,0.0,0.0,32.0,3565.81,168.0,24572.57,121.0,19116.48,16.0,2013.28,0.0,0.0,31.0,3442.81,173.0,25712.31,128.0,20577.2,15.0,1814.28,0.0,0.0,30.0,3320.83,0,0,0.0,2015-08-02,2014-08-03,-19,1,0,0,0,0,0,57.0,6737.81,118.21,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,178.0,149.32,26579.6,54.0,160.0,145.34,60.0,9801.18,-145.34,156.61,51.0,8642.46,3.39,48.0,5579.09,116.23,9.0,1158.72,1.98,146.27,64.0,10227.49,-146.27,157.99,54.0,8945.77,2.01,47.0,5456.09,116.09,10.0,1281.72,2.12,148.63,59.0,9087.75,-148.63,160.76,47.0,7485.05,-0.76,45.0,5135.11,114.11,12.0,1602.7,4.1
2015-08-04,Tue,240.0,36591.77,26.0,180.0,29365.14,25.0,21.0,2846.48,1.0,0.0,0.0,0.0,39.0,4380.15,0.0,2015-08-04,2014-08-05,2,57.0,163.14,177.0,25901.61,126.0,20058.58,17.0,2098.88,0.0,0.0,34.0,3744.15,172.0,25296.7,122.0,19576.67,17.0,2098.88,0.0,0.0,33.0,3621.15,176.0,26399.13,126.0,20539.08,18.0,2360.88,0.0,0.0,32.0,3499.17,0,0,0.0,2015-08-02,2014-08-03,-27,0,0,0,0,1,0,60.0,7226.63,120.44,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,182.0,151.57,27585.83,58.0,163.0,146.34,63.0,10690.16,-146.34,159.2,54.0,9306.56,3.8,51.0,5843.03,114.57,9.0,1383.6,5.87,147.07,68.0,11295.07,-147.07,160.46,58.0,9788.47,2.54,50.0,5720.03,114.4,10.0,1506.6,6.04,150.0,64.0,10192.64,-150.0,163.01,54.0,8826.06,-0.01,50.0,5860.05,117.2,10.0,1366.58,3.24
2015-08-05,Wed,241.0,37252.33,28.0,187.0,30654.82,27.0,19.0,2621.76,1.0,0.0,0.0,0.0,35.0,3975.75,0.0,2015-08-05,2014-08-06,3,57.0,163.93,174.0,25803.81,128.0,20474.4,15.0,1874.16,0.0,0.0,31.0,3455.25,174.0,26247.68,128.0,20820.72,15.0,1874.16,0.0,0.0,31.0,3552.8,174.0,26349.33,129.0,21002.9,16.0,2136.16,0.0,0.0,29.0,3210.27,0,0,0.0,2015-08-02,2014-08-03,-26,0,0,0,0,0,1,54.0,6597.51,122.18,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,182.0,152.92,27831.73,59.0,164.0,148.3,67.0,11448.52,-148.3,159.96,59.0,10180.42,4.04,46.0,5329.41,115.86,8.0,1268.1,6.32,150.85,67.0,11004.65,-150.85,162.66,59.0,9834.1,1.34,46.0,5426.96,117.98,8.0,1170.55,4.2,151.43,67.0,10903.0,-151.43,162.81,58.0,9651.92,1.19,45.0,5346.43,118.81,9.0,1251.08,3.37
2015-08-06,Thu,237.0,36957.08,30.0,185.0,30518.72,27.0,22.0,2992.26,3.0,0.0,0.0,0.0,30.0,3446.1,0.0,2015-08-06,2014-08-07,4,56.0,164.97,169.0,25175.89,125.0,20005.63,18.0,2244.66,0.0,0.0,26.0,2925.6,174.0,26695.28,129.0,21088.45,20.0,2705.66,0.0,0.0,25.0,2901.17,176.0,27065.32,133.0,21878.04,19.0,2506.66,0.0,0.0,24.0,2680.62,0,0,0.0,2015-08-02,2014-08-03,-20,0,0,0,1,0,0,52.0,6438.36,123.81,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,180.0,155.27,27947.92,57.0,165.0,148.97,68.0,11781.19,-148.97,160.05,60.0,10513.09,4.95,44.0,5170.26,117.51,8.0,1268.1,6.3,153.42,63.0,10261.8,-153.42,163.48,56.0,9430.27,1.52,45.0,5606.83,124.6,7.0,831.53,-0.79,153.78,61.0,9891.76,-153.78,164.5,52.0,8640.68,0.5,43.0,5187.28,120.63,9.0,1251.08,3.18


In [23]:
df_sim.shape

(23856, 121)

In [24]:
# add gap to LYA columns (by segment)
# must be done AFTER NONTRN cols added
df_sim["RoomsGapToLYA"] = df_sim.LYA_RoomsSold - df_sim.RoomsOTB
df_sim["RevGapToLYA"] = df_sim.LYA_RoomRev - df_sim.RevOTB
df_sim["ADR_GapToLYA"] = df_sim.LYA_ADR - df_sim.ADR_OTB

df_sim["TRN_RoomsGapToLYA"] = df_sim.LYA_TRN_RoomsSold - df_sim.TRN_RoomsOTB
df_sim["TRN_RevGapToLYA"] = df_sim.LYA_TRN_RoomRev - df_sim.TRN_RevOTB
df_sim["TRN_ADR_GapToLYA"] = df_sim.LYA_TRN_ADR - df_sim.TRN_ADR_OTB

df_sim["NONTRN_RoomsGapToLYA"] = df_sim["RoomsGapToLYA"] - df_sim["TRN_RoomsGapToLYA"]
df_sim["NONTRN_RevGapToLYA"] = df_sim["RevGapToLYA"] - df_sim["TRN_RevGapToLYA"]
df_sim["NONTRN_ADR_GapToLYA"] = df_sim["ADR_GapToLYA"] - df_sim["TRN_ADR_GapToLYA"]

In [25]:
df_sim.shape

(23856, 130)

In [26]:
df_sim.columns

Index(['DOW', 'RoomsOTB', 'RevOTB', 'CxlForecast', 'TRN_RoomsOTB',
       'TRN_RevOTB', 'TRN_CxlForecast', 'TRNP_RoomsOTB', 'TRNP_RevOTB',
       'TRNP_CxlForecast',
       ...
       'TM05_NONTRN_ADR_Pickup', 'RoomsGapToLYA', 'RevGapToLYA',
       'ADR_GapToLYA', 'TRN_RoomsGapToLYA', 'TRN_RevGapToLYA',
       'TRN_ADR_GapToLYA', 'NONTRN_RoomsGapToLYA', 'NONTRN_RevGapToLYA',
       'NONTRN_ADR_GapToLYA'],
      dtype='object', length=130)

In [27]:
# remove all non-gap, non-pickup actual/tminus columns
# I will want to move this down in our script to combine with removing stly cols (we only want pace)
# removing them here just to make it cleaner

df_sim.drop(columns=drop_cols_agg, inplace=True, errors='ignore')
df_sim.columns

Index(['DOW', 'RoomsOTB', 'RevOTB', 'CxlForecast', 'TRN_RoomsOTB',
       'TRN_RevOTB', 'TRN_CxlForecast', 'StayDate', 'STLY_StayDate',
       'DaysUntilArrival', 'Realized_Cxls', 'SellingPrice', 'WE', 'WD',
       'ADR_OTB', 'AsOfDate', 'STLY_AsOfDate', 'RemSupply', 'Mon', 'Sat',
       'Sun', 'Thu', 'Tue', 'Wed', 'NONTRN_RoomsOTB', 'NONTRN_RevOTB',
       'NONTRN_ADR_OTB', 'NONTRN_CxlForecast', 'ACTUAL_RoomsSold',
       'ACTUAL_ADR', 'ACTUAL_RoomRev', 'ACTUAL_NumCancels', 'TRN_ADR_OTB',
       'TM30_RoomsPickup', 'TM30_RevPickup', 'TM30_ADR_Pickup',
       'TM30_TRN_RoomsPickup', 'TM30_TRN_RevPickup', 'TM30_TRN_ADR_Pickup',
       'TM30_NONTRN_RoomsPickup', 'TM30_NONTRN_RevPickup',
       'TM30_NONTRN_ADR_Pickup', 'TM15_RoomsPickup', 'TM15_RevPickup',
       'TM15_ADR_Pickup', 'TM15_TRN_RoomsPickup', 'TM15_TRN_RevPickup',
       'TM15_TRN_ADR_Pickup', 'TM15_NONTRN_RoomsPickup',
       'TM15_NONTRN_RevPickup', 'TM15_NONTRN_ADR_Pickup', 'TM05_RoomsPickup',
       'TM05_RevPickup', 'TM

In [28]:
df_sim.sample(random_state=0)

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,WE,WD,ADR_OTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels,TRN_ADR_OTB,TM30_RoomsPickup,TM30_RevPickup,TM30_ADR_Pickup,TM30_TRN_RoomsPickup,TM30_TRN_RevPickup,TM30_TRN_ADR_Pickup,TM30_NONTRN_RoomsPickup,TM30_NONTRN_RevPickup,TM30_NONTRN_ADR_Pickup,TM15_RoomsPickup,TM15_RevPickup,TM15_ADR_Pickup,TM15_TRN_RoomsPickup,TM15_TRN_RevPickup,TM15_TRN_ADR_Pickup,TM15_NONTRN_RoomsPickup,TM15_NONTRN_RevPickup,TM15_NONTRN_ADR_Pickup,TM05_RoomsPickup,TM05_RevPickup,TM05_ADR_Pickup,TM05_TRN_RoomsPickup,TM05_TRN_RevPickup,TM05_TRN_ADR_Pickup,TM05_NONTRN_RoomsPickup,TM05_NONTRN_RevPickup,TM05_NONTRN_ADR_Pickup,RoomsGapToLYA,RevGapToLYA,ADR_GapToLYA,TRN_RoomsGapToLYA,TRN_RevGapToLYA,TRN_ADR_GapToLYA,NONTRN_RoomsGapToLYA,NONTRN_RevGapToLYA,NONTRN_ADR_GapToLYA
2016-02-09,Tue,69.0,3270.47,18.0,38.0,1828.54,3.0,2016-02-09,2015-02-10,26,0.0,49.26,False,False,47.4,2016-01-14,2015-01-15,136,0,0,0,0,1,0,31.0,1441.93,46.51,15.0,93.0,48.78,4536.95,31.0,48.0,14.0,628.9,-0.63,11.0,558.9,0.98,3.0,70.0,-2.49,3.0,29.97,-1.7,-7.0,-457.6,-2.8,10.0,487.57,1.06,-10.0,-577.2,-1.3,-23.0,-1290.77,-3.14,13.0,713.57,6.05,-69.0,-3270.47,-47.4,-38.0,-1828.54,-48.0,-31.0,-1441.93,0.6


In [29]:
df_sim.loc["2016-04-24"]

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,WE,WD,ADR_OTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels,TRN_ADR_OTB,TM30_RoomsPickup,TM30_RevPickup,TM30_ADR_Pickup,TM30_TRN_RoomsPickup,TM30_TRN_RevPickup,TM30_TRN_ADR_Pickup,TM30_NONTRN_RoomsPickup,TM30_NONTRN_RevPickup,TM30_NONTRN_ADR_Pickup,TM15_RoomsPickup,TM15_RevPickup,TM15_ADR_Pickup,TM15_TRN_RoomsPickup,TM15_TRN_RevPickup,TM15_TRN_ADR_Pickup,TM15_NONTRN_RoomsPickup,TM15_NONTRN_RevPickup,TM15_NONTRN_ADR_Pickup,TM05_RoomsPickup,TM05_RevPickup,TM05_ADR_Pickup,TM05_TRN_RoomsPickup,TM05_TRN_RevPickup,TM05_TRN_ADR_Pickup,TM05_NONTRN_RoomsPickup,TM05_NONTRN_RevPickup,TM05_NONTRN_ADR_Pickup,RoomsGapToLYA,RevGapToLYA,ADR_GapToLYA,TRN_RoomsGapToLYA,TRN_RevGapToLYA,TRN_ADR_GapToLYA,NONTRN_RoomsGapToLYA,NONTRN_RevGapToLYA,NONTRN_ADR_GapToLYA
2016-04-24,Sun,168.0,11749.08,11.0,104.0,7374.64,10.0,2016-04-24,2015-04-26,31,0.0,73.6,False,False,69.94,2016-03-24,2015-03-26,30,0,0,1,0,0,0,64.0,4374.44,68.35,1.0,183.0,73.97,13536.63,111.0,71.0,0.0,0.0,0.0,0.0,-0.0,0.09,0.0,1.818989e-12,0.0,-10.0,-1137.6,-2.46,-7.0,-780.6,-2.47,-3.0,-357.0,-2.27,-15.0,-1667.05,-3.37,-11.0,-1212.05,-3.67,-4.0,-455.0,-2.67,-168.0,-11749.08,-69.94,-104.0,-7374.64,-71.0,-64.0,-4374.44,1.06
2016-04-24,Sun,168.0,11749.08,18.0,104.0,7374.64,17.0,2016-04-24,2015-04-26,30,0.0,73.6,False,False,69.94,2016-03-25,2015-03-27,37,0,0,1,0,0,0,64.0,4374.44,68.35,1.0,183.0,73.97,13536.63,111.0,71.0,0.0,0.0,0.0,0.0,-0.0,0.09,0.0,1.818989e-12,0.0,-10.0,-1137.6,-2.46,-7.0,-780.6,-2.47,-3.0,-357.0,-2.27,-15.0,-1667.05,-3.37,-11.0,-1212.05,-3.67,-4.0,-455.0,-2.67,-168.0,-11749.08,-69.94,-104.0,-7374.64,-71.0,-64.0,-4374.44,1.06
2016-04-24,Sun,170.0,11970.08,9.0,106.0,7595.64,8.0,2016-04-24,2015-04-26,29,0.0,73.6,False,False,70.41,2016-03-26,2015-03-28,26,0,0,1,0,0,0,64.0,4374.44,68.35,1.0,183.0,73.97,13536.63,111.0,72.0,2.0,221.0,0.47,2.0,221.0,1.09,0.0,3.637979e-12,0.0,-8.0,-916.6,-1.99,-5.0,-559.6,-1.47,-3.0,-357.0,-2.27,-13.0,-1446.05,-2.9,-9.0,-991.05,-2.67,-4.0,-455.0,-2.67,-170.0,-11970.08,-70.41,-106.0,-7595.64,-72.0,-64.0,-4374.44,1.59
2016-04-24,Sun,174.0,12318.23,15.0,110.0,7943.79,14.0,2016-04-24,2015-04-26,28,0.0,73.6,False,False,70.79,2016-03-27,2015-03-29,28,0,0,1,0,0,0,64.0,4374.44,68.35,1.0,183.0,73.97,13536.63,111.0,72.0,6.0,569.15,0.85,6.0,569.15,1.09,0.0,9.094947e-13,0.0,-4.0,-568.45,-1.61,-1.0,-211.45,-1.47,-3.0,-357.0,-2.27,-9.0,-1097.9,-2.52,-5.0,-642.9,-2.67,-4.0,-455.0,-2.67,-174.0,-12318.23,-70.79,-110.0,-7943.79,-72.0,-64.0,-4374.44,1.21
2016-04-24,Sun,176.0,12550.23,16.0,109.0,7818.79,15.0,2016-04-24,2015-04-26,27,0.0,73.6,False,False,71.31,2016-03-28,2015-03-30,27,0,0,1,0,0,0,67.0,4731.44,70.62,1.0,183.0,73.97,13536.63,111.0,72.0,8.0,801.15,1.37,5.0,444.15,1.09,3.0,357.0,2.27,-2.0,-336.45,-1.09,-2.0,-336.45,-1.47,0.0,0.0,0.0,-7.0,-865.9,-2.0,-6.0,-767.9,-2.67,-1.0,-98.0,-0.4,-176.0,-12550.23,-71.31,-109.0,-7818.79,-72.0,-67.0,-4731.44,0.69
2016-04-24,Sun,176.0,12550.23,25.0,109.0,7818.79,24.0,2016-04-24,2015-04-26,26,0.0,73.6,False,False,71.31,2016-03-29,2015-03-31,36,0,0,1,0,0,0,67.0,4731.44,70.62,1.0,183.0,73.97,13536.63,111.0,72.0,8.0,801.15,1.37,5.0,444.15,1.09,3.0,357.0,2.27,-2.0,-336.45,-1.09,-2.0,-336.45,-1.47,0.0,0.0,0.0,-7.0,-865.9,-2.0,-6.0,-767.9,-2.67,-1.0,-98.0,-0.4,-176.0,-12550.23,-71.31,-109.0,-7818.79,-72.0,-67.0,-4731.44,0.69
2016-04-24,Sun,176.0,12550.23,18.0,109.0,7818.79,17.0,2016-04-24,2015-04-26,25,0.0,73.6,False,False,71.31,2016-03-30,2015-04-01,29,0,0,1,0,0,0,67.0,4731.44,70.62,1.0,183.0,73.97,13536.63,111.0,72.0,8.0,801.15,1.37,5.0,444.15,1.09,3.0,357.0,2.27,-2.0,-336.45,-1.09,-2.0,-336.45,-1.47,0.0,0.0,0.0,-7.0,-865.9,-2.0,-6.0,-767.9,-2.67,-1.0,-98.0,-0.4,-176.0,-12550.23,-71.31,-109.0,-7818.79,-72.0,-67.0,-4731.44,0.69
2016-04-24,Sun,175.0,12478.73,22.0,108.0,7747.29,21.0,2016-04-24,2015-04-26,24,0.0,73.6,False,False,71.31,2016-03-31,2015-04-02,34,0,0,1,0,0,0,67.0,4731.44,70.62,1.0,183.0,73.97,13536.63,111.0,72.0,7.0,729.65,1.37,4.0,372.65,1.09,3.0,357.0,2.27,-3.0,-407.95,-1.09,-3.0,-407.95,-1.47,0.0,0.0,0.0,-8.0,-937.4,-2.0,-7.0,-839.4,-2.67,-1.0,-98.0,-0.4,-175.0,-12478.73,-71.31,-108.0,-7747.29,-72.0,-67.0,-4731.44,0.69
2016-04-24,Sun,175.0,12500.9,22.0,108.0,7769.46,20.0,2016-04-24,2015-04-26,23,0.0,73.6,False,False,71.43,2016-04-01,2015-04-03,34,0,0,1,0,0,0,67.0,4731.44,70.62,2.0,183.0,73.97,13536.63,111.0,72.0,7.0,751.82,1.49,4.0,394.82,1.09,3.0,357.0,2.27,-3.0,-385.78,-0.97,-3.0,-385.78,-1.47,0.0,9.094947e-13,0.0,-8.0,-915.23,-1.88,-7.0,-817.23,-2.67,-1.0,-98.0,-0.4,-175.0,-12500.9,-71.43,-108.0,-7769.46,-72.0,-67.0,-4731.44,0.57
2016-04-24,Sun,176.0,12662.16,16.0,109.0,7930.72,15.0,2016-04-24,2015-04-26,22,0.0,73.6,False,False,71.94,2016-04-02,2015-04-04,27,0,0,1,0,0,0,67.0,4731.44,70.62,1.0,183.0,73.97,13536.63,111.0,73.0,8.0,913.08,2.0,5.0,556.08,2.09,3.0,357.0,2.27,-2.0,-224.52,-0.46,-2.0,-224.52,-0.47,0.0,9.094947e-13,0.0,-7.0,-753.97,-1.37,-6.0,-655.97,-1.67,-1.0,-98.0,-0.4,-176.0,-12662.16,-71.94,-109.0,-7930.72,-73.0,-67.0,-4731.44,1.06


# EW- NEXT STEPS (THU 5PM)

1. drop unneeded, post-processed TM_nn columns in blank cell above (create list in agg_utils.py)
2. pull stly cols via merge below
3. calculate pace
4. drop unneeded, post-processed stly cols
5. add all of the steps in this notebook to agg.py
6. pull features from list at top of this NB
7. train/test split
8. linear regression (predict RoomsSold)
9. randomForest (predict RoomsSold)

**Time to pull STLY columns. I will accomplish this by merging df_sim on top of itself and pulling the below columns into the next year's row with the `'STLY_'` prefix.***

But before we do that, let's make sure we add in the ADR columns.

NEVERMIND - THIS STEP NEEDS TO COME LAST ONCE WE HAVE ALL OF THE OTHER COLUMNS

In [30]:
df_sim.head(2)

Unnamed: 0,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,WE,WD,ADR_OTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels,TRN_ADR_OTB,TM30_RoomsPickup,TM30_RevPickup,TM30_ADR_Pickup,TM30_TRN_RoomsPickup,TM30_TRN_RevPickup,TM30_TRN_ADR_Pickup,TM30_NONTRN_RoomsPickup,TM30_NONTRN_RevPickup,TM30_NONTRN_ADR_Pickup,TM15_RoomsPickup,TM15_RevPickup,TM15_ADR_Pickup,TM15_TRN_RoomsPickup,TM15_TRN_RevPickup,TM15_TRN_ADR_Pickup,TM15_NONTRN_RoomsPickup,TM15_NONTRN_RevPickup,TM15_NONTRN_ADR_Pickup,TM05_RoomsPickup,TM05_RevPickup,TM05_ADR_Pickup,TM05_TRN_RoomsPickup,TM05_TRN_RevPickup,TM05_TRN_ADR_Pickup,TM05_NONTRN_RoomsPickup,TM05_NONTRN_RevPickup,TM05_NONTRN_ADR_Pickup,RoomsGapToLYA,RevGapToLYA,ADR_GapToLYA,TRN_RoomsGapToLYA,TRN_RevGapToLYA,TRN_ADR_GapToLYA,NONTRN_RoomsGapToLYA,NONTRN_RevGapToLYA,NONTRN_ADR_GapToLYA
2015-08-02,Sun,236.0,34425.02,22.0,187.0,28680.27,21.0,2015-08-02,2014-08-03,0,68.0,153.37,0,0,0.0,2015-08-02,2014-08-03,-27,0,0,1,0,0,0,49.0,5744.75,117.24,1.0,168.0,144.92,24346.11,68.0,153.0,75.0,11762.16,-140.76,68.0,10930.99,3.85,7.0,831.17,0.25,75.0,11501.47,-142.38,67.0,10563.3,2.03,8.0,938.17,0.01,74.0,11169.78,-143.55,64.0,9910.63,0.4,10.0,1259.15,2.22,-236.0,-34425.02,0.0,-187.0,-28680.27,-153.0,-49.0,-5744.75,153.0
2015-08-03,Mon,232.0,34800.06,26.0,175.0,28062.25,25.0,2015-08-03,2014-08-04,1,54.0,160.36,0,0,0.0,2015-08-02,2014-08-03,-19,1,0,0,0,0,0,57.0,6737.81,118.21,1.0,178.0,149.32,26579.6,54.0,160.0,60.0,9801.18,-145.34,51.0,8642.46,3.39,9.0,1158.72,1.98,64.0,10227.49,-146.27,54.0,8945.77,2.01,10.0,1281.72,2.12,59.0,9087.75,-148.63,47.0,7485.05,-0.76,12.0,1602.7,4.1,-232.0,-34800.06,0.0,-175.0,-28062.25,-160.0,-57.0,-6737.81,160.0


In [31]:
# pull STLY columns with self-merge to STLY date

# first, we need to create unique ID col (id) for each as-of-date/stay-date combo
# then, we manipulate strings to add a stly_id column that we can use as right key for our merge

df_sim_ids = df_sim.AsOfDate.astype(str) + ' - ' + df_sim.StayDate.astype(str)
df_sim.insert(0, "id", df_sim_ids)

df_sim_stly_ids = df_sim.STLY_AsOfDate.astype(str) + ' - ' + df_sim.STLY_StayDate.astype(str)
df_sim.insert(1, "stly_id", df_sim_stly_ids)
df_sim.head()

Unnamed: 0,id,stly_id,DOW,RoomsOTB,RevOTB,CxlForecast,TRN_RoomsOTB,TRN_RevOTB,TRN_CxlForecast,StayDate,STLY_StayDate,DaysUntilArrival,Realized_Cxls,SellingPrice,WE,WD,ADR_OTB,AsOfDate,STLY_AsOfDate,RemSupply,Mon,Sat,Sun,Thu,Tue,Wed,NONTRN_RoomsOTB,NONTRN_RevOTB,NONTRN_ADR_OTB,NONTRN_CxlForecast,ACTUAL_RoomsSold,ACTUAL_ADR,ACTUAL_RoomRev,ACTUAL_NumCancels,TRN_ADR_OTB,TM30_RoomsPickup,TM30_RevPickup,TM30_ADR_Pickup,TM30_TRN_RoomsPickup,TM30_TRN_RevPickup,TM30_TRN_ADR_Pickup,TM30_NONTRN_RoomsPickup,TM30_NONTRN_RevPickup,TM30_NONTRN_ADR_Pickup,TM15_RoomsPickup,TM15_RevPickup,TM15_ADR_Pickup,TM15_TRN_RoomsPickup,TM15_TRN_RevPickup,TM15_TRN_ADR_Pickup,TM15_NONTRN_RoomsPickup,TM15_NONTRN_RevPickup,TM15_NONTRN_ADR_Pickup,TM05_RoomsPickup,TM05_RevPickup,TM05_ADR_Pickup,TM05_TRN_RoomsPickup,TM05_TRN_RevPickup,TM05_TRN_ADR_Pickup,TM05_NONTRN_RoomsPickup,TM05_NONTRN_RevPickup,TM05_NONTRN_ADR_Pickup,RoomsGapToLYA,RevGapToLYA,ADR_GapToLYA,TRN_RoomsGapToLYA,TRN_RevGapToLYA,TRN_ADR_GapToLYA,NONTRN_RoomsGapToLYA,NONTRN_RevGapToLYA,NONTRN_ADR_GapToLYA
2015-08-02,2015-08-02 - 2015-08-02,2014-08-03 - 2014-08-03,Sun,236.0,34425.02,22.0,187.0,28680.27,21.0,2015-08-02,2014-08-03,0,68.0,153.37,0,0,0.0,2015-08-02,2014-08-03,-27,0,0,1,0,0,0,49.0,5744.75,117.24,1.0,168.0,144.92,24346.11,68.0,153.0,75.0,11762.16,-140.76,68.0,10930.99,3.85,7.0,831.17,0.25,75.0,11501.47,-142.38,67.0,10563.3,2.03,8.0,938.17,0.01,74.0,11169.78,-143.55,64.0,9910.63,0.4,10.0,1259.15,2.22,-236.0,-34425.02,0.0,-187.0,-28680.27,-153.0,-49.0,-5744.75,153.0
2015-08-03,2015-08-02 - 2015-08-03,2014-08-03 - 2014-08-04,Mon,232.0,34800.06,26.0,175.0,28062.25,25.0,2015-08-03,2014-08-04,1,54.0,160.36,0,0,0.0,2015-08-02,2014-08-03,-19,1,0,0,0,0,0,57.0,6737.81,118.21,1.0,178.0,149.32,26579.6,54.0,160.0,60.0,9801.18,-145.34,51.0,8642.46,3.39,9.0,1158.72,1.98,64.0,10227.49,-146.27,54.0,8945.77,2.01,10.0,1281.72,2.12,59.0,9087.75,-148.63,47.0,7485.05,-0.76,12.0,1602.7,4.1,-232.0,-34800.06,0.0,-175.0,-28062.25,-160.0,-57.0,-6737.81,160.0
2015-08-04,2015-08-02 - 2015-08-04,2014-08-03 - 2014-08-05,Tue,240.0,36591.77,26.0,180.0,29365.14,25.0,2015-08-04,2014-08-05,2,57.0,163.14,0,0,0.0,2015-08-02,2014-08-03,-27,0,0,0,0,1,0,60.0,7226.63,120.44,1.0,182.0,151.57,27585.83,58.0,163.0,63.0,10690.16,-146.34,54.0,9306.56,3.8,9.0,1383.6,5.87,68.0,11295.07,-147.07,58.0,9788.47,2.54,10.0,1506.6,6.04,64.0,10192.64,-150.0,54.0,8826.06,-0.01,10.0,1366.58,3.24,-240.0,-36591.77,0.0,-180.0,-29365.14,-163.0,-60.0,-7226.63,163.0
2015-08-05,2015-08-02 - 2015-08-05,2014-08-03 - 2014-08-06,Wed,241.0,37252.33,28.0,187.0,30654.82,27.0,2015-08-05,2014-08-06,3,57.0,163.93,0,0,0.0,2015-08-02,2014-08-03,-26,0,0,0,0,0,1,54.0,6597.51,122.18,1.0,182.0,152.92,27831.73,59.0,164.0,67.0,11448.52,-148.3,59.0,10180.42,4.04,8.0,1268.1,6.32,67.0,11004.65,-150.85,59.0,9834.1,1.34,8.0,1170.55,4.2,67.0,10903.0,-151.43,58.0,9651.92,1.19,9.0,1251.08,3.37,-241.0,-37252.33,0.0,-187.0,-30654.82,-164.0,-54.0,-6597.51,164.0
2015-08-06,2015-08-02 - 2015-08-06,2014-08-03 - 2014-08-07,Thu,237.0,36957.08,30.0,185.0,30518.72,27.0,2015-08-06,2014-08-07,4,56.0,164.97,0,0,0.0,2015-08-02,2014-08-03,-20,0,0,0,1,0,0,52.0,6438.36,123.81,3.0,180.0,155.27,27947.92,57.0,165.0,68.0,11781.19,-148.97,60.0,10513.09,4.95,8.0,1268.1,6.3,63.0,10261.8,-153.42,56.0,9430.27,1.52,7.0,831.53,-0.79,61.0,9891.76,-153.78,52.0,8640.68,0.5,9.0,1251.08,3.18,-237.0,-36957.08,0.0,-185.0,-30518.72,-165.0,-52.0,-6438.36,165.0


In [41]:
df_sim["DayOfWeek"] df_sim.StayDate.map(lambda x: dt.datetime.strftime(x, format="%a"))

2015-08-02    Sun
2015-08-03    Mon
2015-08-04    Tue
2015-08-05    Wed
2015-08-06    Thu
             ... 
2017-08-30    Wed
2017-08-31    Thu
2017-08-30    Wed
2017-08-31    Thu
2017-08-31    Thu
Name: StayDate, Length: 23856, dtype: object

In [32]:
df_sim.shape

(23856, 71)

In [33]:
# self-join df_sim to pull stly stats using the above keys

df_sim = df_sim.merge(df_sim[stly_cols_agg], left_on='stly_id', right_on='id', suffixes=(None, "_STLY"))
df_sim.head(2)

KeyError: "['ACTUAL_TRN_RevPickup', 'ACTUAL_ADR_Pickup', 'ACTUAL_TRN_RoomsPickup', 'ACTUAL_TRN_ADR_Pickup', 'ACTUAL_RoomsPickup', 'ACTUAL_RevPickup'] not in index"

In [None]:
df_sim.shape

In [None]:
df_sim[['id', 'stly_id', 'AsOfDate', 'StayDate', 'AsOfDate_STLY', 'StayDate_STLY', 'RoomsOTB_STLY', 'RevOTB_STLY']]




In [None]:
len(df_sim[df_sim.AsOfDate_STLY.isna()])

In [None]:
len(df_sim.dropna())

In [None]:
[c for c in df_sim.columns if c[-5:] == '_STLY']

In [None]:
df_sim.shape

In [None]:
df_test_stly = pd.read_pickle("./sims/pickle/h1_sim_2016-08-02.pick")
df_test_stly.loc["2016-08-06"]