In [None]:
import os
import pandas as pd
import numpy as np
import datetime
from sklearn.preprocessing import OneHotEncoder as ohe

from dbds import generate_hotel_dfs
from sim import generate_simulation
from model_cancellations import get_otb_res, predict_cancellations
from agg_utils import stly_cols, ly_cols

pd.options.display.max_rows = 150
pd.options.display.max_columns = 250

DATE_FMT = "%Y-%m-%d"
h1_capacity = 187
h2_capacity = 226
AOD = "2017-08-01"
AOD_dt = pd.to_datetime(AOD)

h1_res = pd.read_pickle("pickle/h1_res.pick")
h2_res = pd.read_pickle("pickle/h2_res.pick")
h1_dbd = pd.read_pickle("pickle/h1_dbd.pick")
h2_dbd = pd.read_pickle("pickle/h2_dbd.pick")

In [None]:
# h1_res, h1_dbd = generate_hotel_dfs("../data/H1.csv", capacity=h1_capacity)
# h2_res, h2_dbd = generate_hotel_dfs("../data/H2.csv", capacity=h2_capacity)

# h1_res.to_pickle("pickle/h1_res.pick")
# h1_dbd.to_pickle("pickle/h1_dbd.pick")
# h2_res.to_pickle("pickle/h2_res.pick")
# h2_dbd.to_pickle("pickle/h2_dbd.pick")

In [None]:
# features needed:
features = ["DaysUntilArrival",
"RoomsOTB",
"RevOTB",
"CxlForecast",
"RemSupply",
"SellingPrice",
"STLY_SellingPrice",
"TRN_RoomsOTB",
"TRN_RevOTB",
"TRN_CxlForecast",
"LYA_RoomsSold",
"LYA_ADR",
"LYA_RoomRev",
"WE",
"WD",
"Mon",
"Sat",
"Sun",
"Thu",
"Tue",
"Wed",
"RoomsGapToLYA",
"ADR_GapToLYA",
"TM30_TRN_RoomsPickup",
"TM30_TRN_ADR_Pickup",
"TM30_TRN_RevPickup",
"TM15_RoomsPickup",
"TM15_ADR_Pickup",
"TM15_RevPickup",
"TM15_TRN_RoomsPickup",
"TM15_TRN_ADR_Pickup",
"TM15_TRN_RevPickup",
"TM05_RoomsPickup",
"TM05_ADR_Pickup",
"TM05_RevPickup",
"TM05_TRN_RoomsPickup",
"TM05_TRN_ADR_Pickup",
"TM05_TRN_RevPickup",
"RoomsOTB_Pace",
"ADR_OTB_Pace",
"RevOTB_Pace",
"TRN_RoomsOTB_Pace",
"TRN_ADR_OTB_Pace",
"TRN_RevOTB_Pace",
"TM30_RoomsPickup_Pace",
"TM30_ADR_Pickup_Pace",
"TM30_RevPickup_Pace",
"TM15_RoomsPickup_Pace",
"TM15_ADR_Pickup_Pace",
"TM15_RevPickup_Pace",
"TM05_RoomsPickup_Pace",
"TM05_ADR_Pickup_Pace",
"TM05_RevPickup_Pace",
"TM30_TRN_RoomsPickup_Pace",
"TM30_TRN_ADR_Pickup_Pace",
"TM30_TRN_RevPickup_Pace",
"TM15_TRN_RoomsPickup_Pace",
"TM15_TRN_ADR_Pickup_Pace",
"TM15_TRN_RevPickup_Pace",
"TM05_TRN_RoomsPickup_Pace",
"TM05_TRN_ADR_Pickup_Pace",
"TM05_TRN_RevPickup_Pace",
]

y_col = 'Actual_RoomsSold'

## Combine Files Generated by save_sims.py

In [None]:
# generate list of relevant files
import datetime as dt
hotel_num = 1
h = 'h' + str(hotel_num)
SIM_AOD = pd.to_datetime(dt.date(2017, 8, 1), format=DATE_FMT)
SIM_START = SIM_AOD - pd.DateOffset(364) # need exactly 364 days of actuals, the rest future-looking

FOLDER = "./sims/pickle/"
first_date = dt.date(2016, 8, 1)
lam_include = lambda x: x[:2] == h and pd.to_datetime(x[7:17]) >= SIM_START
h1_files = [f for f in os.listdir(FOLDER) if lam_include(f)]
h1_files.sort()
len(h1_files), h1_files[0], h1_files[-1] # note STLY date of 8/1/17 == 8/2/16 (matching weekday)

In [None]:
%%time
df_sim = pd.DataFrame()
for otb_data in h1_files:
    df_sim = df_sim.append(pd.read_pickle(FOLDER + otb_data))

df_sim.shape

In [None]:
drop_cols = [
'TM05_TRNP_RoomsOTB',
'TM05_TRNP_RevOTB',
'TM05_GRP_RoomsOTB',
'TM05_GRP_RevOTB',
'TM05_CNT_RoomsOTB',
'TM05_CNT_RevOTB',
'TM15_TRNP_RoomsOTB',
'TM15_TRNP_RevOTB',
'TM15_GRP_RoomsOTB',
'TM15_GRP_RevOTB',
'TM15_CNT_RoomsOTB',
'TM15_CNT_RevOTB',
'TM30_TRNP_RoomsOTB',
'TM30_TRNP_RevOTB',
'TM30_GRP_RoomsOTB',
'TM30_GRP_RevOTB',
'TM30_CNT_RoomsOTB',
'TM30_CNT_RevOTB',
'TRNP_RoomsOTB',
'TRNP_RevOTB',
'TRNP_CxlForecast',
'GRP_RoomsOTB',
'GRP_RevOTB',
'GRP_CxlForecast',
'CNT_RoomsOTB',
'CNT_RevOTB',
'CNT_CxlForecast'
]

df_sim.drop(columns=drop_cols, inplace=True, errors='ignore')
df_sim.columns

## Adding calculated features

In [None]:
# Add AsOfDate

def apply_aod(row):
    stay_date = pd.to_datetime(row["Date"])
    n_days_b4 = int(row["DaysUntilArrival"])
    return pd.to_datetime(stay_date - pd.DateOffset(n_days_b4), format=DATE_FMT)

aods = df_sim[["Date", "DaysUntilArrival"]].apply(apply_aod, axis=1)

# using try/except so that we can re-run this cell (remove in script)
try: 
    df_sim.insert(0, column='AsOfDate', value=aods, allow_duplicates=False)
except:
    pass

df_sim.head()

In [None]:
# add remaining supply ('RemSupply')
capacity = 187
df_sim["RemSupply"] = (
    capacity - df_sim.RoomsOTB.astype(int) + df_sim.CxlForecast.astype(int)
)

In [None]:
# add one-hot-encoded DOW ('Day of Week') columns

ohe_dow = pd.get_dummies(df_sim.DOW, drop_first=True)
dow_ohe_cols = list(ohe_dow.columns)
df_sim[dow_ohe_cols] = ohe_dow

In [None]:
# add NONTRN cols

df_sim["NONTRN_RoomsOTB"] = (
    df_sim.RoomsOTB - df_sim.TRN_RoomsOTB
)
df_sim["NONTRN_RevOTB"] = df_sim.RevOTB - df_sim.TRN_RevOTB
df_sim["NONTRN_ADR_OTB"] = round(df_sim["NONTRN_RevOTB"] / df_sim["NONTRN_RoomsOTB"], 2)
df_sim["NONTRN_CxlForecast"] = df_sim.CxlForecast - df_sim.TRN_CxlForecast

# df_sim["LYA_NONTRN_RoomsOTB"] = (
#     df_sim.LYA_TRNP_RoomsOTB + df_sim.LYA_GRP_RoomsOTB + df_sim.LYA_CNT_RoomsOTB
# )
# df_sim["LYA_NONTRN_RevOTB"] = df_sim.LYA_TRNP_RevOTB + df_sim.LYA_GRP_RevOTB + df_sim.LYA_CNT_RevOTB



In [None]:
# Add ADR by segment for OTB, LYA, STLY cols (required for pace comparisons)


# df_sim["NONTRN_ADR_OTB"] = round(df_sim["NONTRN_RevOTB"] / df_sim["NONTRN_RoomsOTB"], 2)
# df_sim["LYA_NONTRN_ADR_OTB"] = round(df_sim["LYA_NONTRN_RevOTB"] / df_sim["LYA_NONTRN_RoomsOTB"], 2)

In [None]:
# Add last-year actual columns ("LYA_")

def apply_ly_cols(row):
    stly_date = row["STLY_Date"]
    stly_date_str = datetime.datetime.strftime(stly_date, format=DATE_FMT)

    df_lya = list(h1_dbd.loc[stly_date_str, ly_cols])
    return tuple(df_lya)

ly_new_cols = ["LYA_" + col for col in ly_cols]
df_sim[ly_new_cols] = df_sim[["STLY_Date"]].apply(apply_ly_cols, axis=1, result_type="expand")

df_sim.fillna(0, inplace=True)

# add gap to LYA column 
df_sim["RoomsGapToLYA"] = df_sim.LYA_RoomsSold - df_sim.RoomsOTB
df_sim["ADR_GapToLYA"] = df_sim.LYA_ADR - df_sim.ADR_OTB

df_sim.head()

# df_sim["TRN_ADR_OTB"] = round(df_sim["TRN_RevOTB"] / df_sim["TRN_RoomsOTB"], 2)
# df_sim["LYA_ADR"] = round(df_sim.RevOTB / df_sim.RoomsOTB, 2)
# df_sim["LYA_TRN_ADR"] = round(df_sim["LYA_TRN_RoomRev"] / df_sim["LYA_TRN_RoomsSold"], 2)


In [None]:
%%time
# get recent pickup (tminus) columns


# first need TRN_ADR

df_sim["TRN_ADR_OTB"] = round(df_sim["TRN_RevOTB"] / df_sim["TRN_RoomsOTB"])

# loop thru tminus windows (for total hotel & trn) & count bookings
tms = ["TM30_", "TM15_", "TM05_"]
segs = ["", "TRN_"] # "" for total hotel
# df_sim.drop(
#     columns=["TM30_Date", "TM15_Date", "TM05_Date"], inplace=True, errors="ignore"
# )

for tm in tms:
    for seg in segs:
        
        # add tm_seg_adr
        df_sim[tm + seg + "ADR_OTB"] = round(df_sim[tm + seg + "RevOTB"] / df_sim[tm + seg + "RoomsOTB"], 2)
        # and now segmented stats
        df_sim[tm + seg + "RoomsPickup"] = round(
            df_sim[seg + "RoomsOTB"] - df_sim[tm + seg + "RoomsOTB"], 2
        )
        df_sim[tm + seg + "RevPickup"] = round(
            df_sim[seg + "RevOTB"] - df_sim[tm + seg + "RevOTB"], 2
        )
        df_sim[tm + seg + "ADR_Pickup"] = round(
            df_sim[seg + "ADR_OTB"] - df_sim[tm + seg + "ADR_OTB"], 2
        )


    # back to outside loop (iterating thru tms)
    # add TM_NONTRN_OTB
    df_sim[tm +  "NONTRN_RoomsOTB"] = (
        df_sim[tm + "RoomsOTB"]
        - df_sim[tm + "TRN_RoomsOTB"]
    )
    df_sim[tm + "NONTRN_RevOTB"] = (
        df_sim[tm + "RevOTB"]
        - df_sim[tm + "TRN_RevOTB"]
    )
    df_sim[tm + "NONTRN_ADR_OTB"] = round(
        df_sim[tm + "NONTRN_RevOTB"] / df_sim[tm + "NONTRN_RoomsOTB"], 2
    )
    # add TM_NONTRN_OTB Pickup
    df_sim[tm +  "NONTRN_RoomsPickup"] = (
        df_sim["NONTRN_RoomsOTB"]
        - df_sim[tm + "NONTRN_RoomsOTB"]
    )
    df_sim[tm + "NONTRN_RevPickup"] = (
        df_sim["NONTRN_RevOTB"]
        - df_sim[tm + "NONTRN_RevOTB"]
    )
    df_sim[tm + "NONTRN_ADR_Pickup"] = (
        df_sim["NONTRN_ADR_OTB"]
        - df_sim[tm + "NONTRN_ADR_OTB"]
    )

df_sim.head()

In [None]:
df_sim.columns

**Time to pull STLY columns. I will accomplish this by merging df_sim on top of itself and pulling the below columns into the next year's row with the `'STLY_'` prefix.***

But before we do that, let's make sure we add in the ADR columns.

NEVERMIND - THIS STEP NEEDS TO COME LAST ONCE WE HAVE ALL OF THE OTHER COLUMNS

In [None]:
# pull STLY columns with self-merge to STLY date

stly_cols = [
    'RoomsOTB', 'ADR_OTB', 'RevOTB', 'CxlForecast', 'TRN_RoomsOTB',
    'TRN_RevOTB', 'TRN_CxlForecast', 'TRNP_RoomsOTB', 'TRNP_RevOTB',
    'TRNP_CxlForecast', 'GRP_RoomsOTB', 'GRP_RevOTB', 'GRP_CxlForecast',
    'CNT_RoomsOTB', 'CNT_RevOTB', 'CNT_CxlForecast',
    'TM30_RoomsOTB', 'TM30_RevOTB', 'TM30_TRN_RoomsOTB', 'TM30_TRN_RevOTB',
    'TM30_TRNP_RoomsOTB', 'TM30_TRNP_RevOTB', 'TM30_GRP_RoomsOTB',
    'TM30_GRP_RevOTB', 'TM30_CNT_RoomsOTB', 'TM30_CNT_RevOTB',
    'TM15_RoomsOTB', 'TM15_RevOTB', 'TM15_TRN_RoomsOTB', 'TM15_TRN_RevOTB',
    'TM15_TRNP_RoomsOTB', 'TM15_TRNP_RevOTB', 'TM15_GRP_RoomsOTB',
    'TM15_GRP_RevOTB', 'TM15_CNT_RoomsOTB', 'TM15_CNT_RevOTB',
    'TM05_RoomsOTB', 'TM05_RevOTB', 'TM05_TRN_RoomsOTB', 'TM05_TRN_RevOTB',
    'TM05_TRNP_RoomsOTB', 'TM05_TRNP_RevOTB', 'TM05_GRP_RoomsOTB',
    'TM05_GRP_RevOTB', 'TM05_CNT_RoomsOTB', 'TM05_CNT_RevOTB',
]

df_sim["STLY_NONTRN_RoomsOTB"] = (
    df_sim.STLY_TRNP_RoomsOTB + df_sim.STLY_GRP_RoomsOTB + df_sim.STLY_CNT_RoomsOTB
)
df_sim["STLY_NONTRN_RevOTB"] = df_sim.STLY_TRNP_RevOTB + df_sim.STLY_GRP_RevOTB + df_sim.STLY_CNT_RevOTB

In [None]:
stly_cols = df_sim.merge(df_sim[stly_cols], left_on='STLY_Date', right_on='AsOfDate')

In [None]:
h2_res[h2_res['CustomerType'] == 'Contract'].shape