In [658]:
import pandas as pd
import numpy as np
import os
from datetime import timedelta, date
import sys

'''This script produces a file which shows availability. Each row is a booking
date, and each col is a (room type, stay date) tuple'''

def daterange(start_date, end_date):
    for n in range(int ((end_date - start_date).days)):
        yield start_date + timedelta(n)

# load raw hotel data
df_data = pd.read_csv('data/2018_clean.csv', parse_dates=['ARRIVAL', 'DEPART', 'LOOK_DATE'])
df_data = (df_data.sort_values(['LOOK_DATE', 'RESNO'])
           .drop(['RESNO'], axis=1))

df_caps = pd.read_csv('data/capacities.csv', index_col='UNIT')

In [659]:
# helper vars
look_start = df_data['LOOK_DATE'].min()
look_end = df_data['LOOK_DATE'].max()
look_range = pd.date_range(look_start, look_end)

ssn_start = df_data['ARRIVAL'].min()
ssn_end = df_data['ARRIVAL'].max()
ssn_range = pd.date_range(ssn_start, ssn_end)

# get max periods per day from transactions script
periods_per_day = (df_data[df_data['CANCEL_INDICATOR'] == 0] # look at bookings only
                   .drop(['CANCEL_INDICATOR'], axis=1)
                   .groupby('group_id').first() # collapse groups
                   .sort_values('LOOK_DATE')
                   .groupby(by=['LOOK_DATE', 'ARRIVAL']).count()
                   .max()[0])
intraday_range = range(0, periods_per_day)


In [660]:
# initializing blank df with same height as transactions

# generate room type list
df_grouped_types = df_data.groupby(by='UNIT').count()
unit_list = df_grouped_types.index.tolist()

# create blank df
df_avail = pd.DataFrame(index=pd.MultiIndex.from_product([look_range, ssn_range, intraday_range], 
                                                           names=['LOOK_DATE', 'ARRIVAL', 'INTRADAY']),
                        columns=unit_list)
df_avail = df_avail.fillna(0.0)

# add default capacities
for index, row in df_capa.iterrows():
    df_avail[index] = df_avail[index] + row['CAPACITY']

In [661]:
df_avail

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,2BV,4BV,CD,CK,DD,DK,DKB
LOOK_DATE,ARRIVAL,INTRADAY,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-01-02,2018-05-11,0,8.0,4.0,13.0,17.0,24.0,6.0,12.0
2018-01-02,2018-05-11,1,8.0,4.0,13.0,17.0,24.0,6.0,12.0
2018-01-02,2018-05-11,2,8.0,4.0,13.0,17.0,24.0,6.0,12.0
2018-01-02,2018-05-11,3,8.0,4.0,13.0,17.0,24.0,6.0,12.0
2018-01-02,2018-05-11,4,8.0,4.0,13.0,17.0,24.0,6.0,12.0
2018-01-02,2018-05-11,5,8.0,4.0,13.0,17.0,24.0,6.0,12.0
2018-01-02,2018-05-11,6,8.0,4.0,13.0,17.0,24.0,6.0,12.0
2018-01-02,2018-05-11,7,8.0,4.0,13.0,17.0,24.0,6.0,12.0
2018-01-02,2018-05-11,8,8.0,4.0,13.0,17.0,24.0,6.0,12.0
2018-01-02,2018-05-11,9,8.0,4.0,13.0,17.0,24.0,6.0,12.0


In [662]:
row = df_data.iloc[1,:]
df_test = df_avail.copy()
cur_idx = (row['LOOK_DATE'], row['ARRIVAL'], 1)
cur_idx2 = (row['LOOK_DATE'], row['ARRIVAL'])
# df_test.loc[cur_idx2, row['UNIT']]

In [663]:
len(df_data)

10919

In [664]:
# fill in capacities by iterating over transaction data

# helper load bar function
def progress(count, total, status=''):
    bar_len = 60
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '=' * filled_len + '-' * (bar_len - filled_len)

    sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
    sys.stdout.flush()
    
total = len(df_data)
    
for index, row in df_data.iterrows():
    progress(index, total, status='Filling in availability')  
    cap_change = 1/row['grp_size']
    
    # book arrive delta, must subtract capacity for all these dates
    ba_delta = pd.date_range(row['LOOK_DATE'], row['ARRIVAL'])

    # subtract or add capacity for each day b/w look and arrive
    if row['CANCEL_INDICATOR'] == 0:
        # loop subtract (i know this is bad practice lol)
        for date in ba_delta:
            for i in intraday_range:
                cur_idx = (date, row['ARRIVAL'], i)
                df_avail.loc[cur_idx, row['UNIT']] -= cap_change
    else:
        for date in ba_delta:
            for i in intraday_range:
                cur_idx = (date, row['ARRIVAL'], i)
                df_avail.loc[cur_idx, row['UNIT']] += cap_change



In [665]:
df_avail.to_csv('data/availability.csv')

In [667]:
df_avail.min()

2BV   -3.000000e+00
4BV   -3.000000e+00
CD    -1.332268e-15
CK     0.000000e+00
DD    -2.000000e+00
DK    -1.000000e+00
DKB    5.000000e-01
dtype: float64