In [256]:
import pandas as pd
import numpy as np
import os
from datetime import timedelta, date
import sys

'''This script produces a file which shows availability. Each row is a booking
date, and each col is a (room type, arrival week, price) tuple'''

df_data = pd.read_csv('../../../data/cabot_data/clean_base/2018_clean.csv',
                       parse_dates = ['LOOK_DATE', 'ARRIVAL', 'DEPART'],
                       infer_datetime_format = True,
                       index_col = 'group_id')
df_caps = pd.read_csv('../../../data/cabot_data/clean_base/capacities.csv', index_col='UNIT')

In [246]:
# add stay len col to df_data
df_data['stay_len'] = (df_data['DEPART'] - df_data['ARRIVAL']).dt.days
# anything greater than 4 is 4
df_data['stay_len_clean'] = df_data['stay_len'].apply(lambda x: 4 if x > 4 else x)

In [247]:
# helper vars
look_start = df_data['LOOK_DATE'].min()
look_end = df_data['LOOK_DATE'].max()
look_range = pd.date_range(look_start, look_end)

ssn_start = df_data['ARRIVAL'].min()
ssn_end = df_data['ARRIVAL'].max()
ssn_range = pd.date_range(ssn_start, ssn_end)

# get season week range
ssn_weeks = ssn_range.week.drop_duplicates().tolist()

# get max periods per day from transactions script
periods_per_day = (df_data[df_data['CANCEL_INDICATOR'] == 0] # look at bookings only
                   .drop(['CANCEL_INDICATOR'], axis=1)
                   .groupby('group_id').first() # collapse groups
                   .sort_values('LOOK_DATE')
                   .groupby(by=['LOOK_DATE', 'ARRIVAL']).count()
                   .max()[0])
intraday_range = range(0, periods_per_day)

# list of days in week
week_days = [1, 2, 3, 4, 5, 6, 7]

# list of stay lengths (4 refers to 4 days or more)
stay_lens = [1, 2, 3, 4]

# room type list
df_grouped_types = df_data.groupby(by='UNIT').count()
unit_list = df_grouped_types.index.tolist()

In [248]:
# initializing blank avail df
# create blank df
df_avail = pd.DataFrame(index=look_range,
                        columns=pd.MultiIndex.from_product([unit_list, ssn_weeks, week_days, stay_lens], 
                                                           names=['UNIT', 'week', 'dow', 'stay_len']))
df_avail = df_avail.fillna(0.0)

# add default capacities
for index, row in df_caps.iterrows():
    df_avail[index] = df_avail[index] + row['CAPACITY']

In [249]:
# helper load bar function
def progress(count, total, status=''):
    bar_len = 60
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '=' * filled_len + '-' * (bar_len - filled_len)

    sys.stdout.write('[%s] %s%s ...%s\r' % (bar, percents, '%', status))
    sys.stdout.flush()
total = len(df_data)

In [257]:
df_data = df_data.reset_index()

In [258]:
df_data

Unnamed: 0,group_id,UNIT,RESNO,ARRIVAL,DEPART,LOOK_DATE,CANCEL_INDICATOR,grp_size
0,269,4BV,10398O,2018-09-06,2018-09-09,2018-01-02,0,5
1,863,4BV,10398O,2018-09-06,2018-09-09,2018-01-02,0,4
2,2681,DKB,10398P,2018-09-27,2018-09-30,2018-01-02,0,2
3,269,4BV,10398Q,2018-09-06,2018-09-09,2018-01-02,0,5
4,2143,CK,10398R,2018-05-18,2018-05-21,2018-01-02,0,2
5,2143,CK,10398T,2018-05-18,2018-05-21,2018-01-02,0,2
6,70,4BV,10398U,2018-05-25,2018-05-29,2018-01-02,0,4
7,2710,DD,10398V,2018-06-21,2018-06-23,2018-01-02,0,2
8,2850,DD,10398V,2018-06-21,2018-06-23,2018-01-02,0,3
9,4477,DD,10398V,2018-06-21,2018-06-23,2018-01-02,0,3


In [250]:
for index, row in df_data.head().iterrows():
    progress(index, total, status='Filling in availability')
    
    cap_change = 1/row['grp_size']
    
    # book arrive delta, must subtract capacity for all these dates
    ba_delta = pd.date_range(row['LOOK_DATE'], row['ARRIVAL'])
    
    # helper vars
    unit = row['UNIT']
    week = row['ARRIVAL'].week
    dow = row['ARRIVAL'].dayofweek + 1
    stay_len = row['stay_len']
    
    # subtract or add capacity for each day b/w look and arrive
    if row['CANCEL_INDICATOR'] == 0:
        for look_date in ba_delta:
            for n in range(0, stay_len):
                dow_temp = (n + dow-1) % 7 + 1 # points to day of week
                week_temp = (week + (n + dow - 1)//7)  # moves week forward 1 if stay crosses over into next week
                num_lengths_affected = min(stay_len - n, 4) # tracks which stay lengths are affected
                for length in range(0, num_lengths_affected):
                    df_avail.loc[look_date, (unit, week_temp, dow_temp, length+1)] -= cap_change
    else:
        for look_date in ba_delta:
            for n in range(0, stay_len):
                dow_temp = (n + dow-1) % 7 + 1
                week_temp = (week + (n + dow - 1)//7)
                num_lengths_affected = max(stay_len - n, 4)
                for length in range(0, num_lengths_affected):
                    df_avail.loc[look_date, (unit, week_temp, dow_temp, length+1)] -= cap_change



KeyboardInterrupt: 

In [None]:
### testing code
# row = df_data.iloc[6,:]
# df_avail.loc[row['LOOK_DATE'], (row['UNIT'], 19, 1)]
# unit = row['UNIT']
# week = row['ARRIVAL'].week
# dow = row['ARRIVAL'].dayofweek + 1
# stay_len = row['stay_len']

# print("ARRIVE DOW", dow)

# for look_date in ba_delta:
#     for n in range(0, stay_len):
#         dow_temp = (n + dow-1) % 7 + 1 # points to day of week
#         week_temp = (week + (n + dow - 1)//7)  # moves week forward 1 if stay crosses over into next week
#         num_lengths_affected = max(stay_len - n, 4) # tracks which stay lengths are affected
#         for length in range(0, num_lengths_affected):
#             df_avail.loc[look_date, (unit, week_temp, dow_temp, length+1) -= cap_change
