In [1]:
!which python

/Users/ahmadabdullahtariq/Documents/Projects/Thesis/.venv/bin/python


In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
df_occ = pd.read_pickle('dataset/preprocessed/occupancy.pkl')
df_pri = pd.read_pickle('dataset/preprocessed/price.pkl')

In [3]:
df_occ = df_occ[df_occ.HotelId==284]
df_pri = df_pri[df_pri.HotelId==284]

In [4]:
df_occ = df_occ.drop_duplicates(subset=['TargetDate', 'SnapshotDate'], keep='last')
df_pri = df_pri.drop_duplicates(subset=['TargetDate', 'SnapshotDate'], keep='last')

In [5]:
df_occ = df_occ.drop(columns=['LeadTime', 'HotelId'])
df_pri = df_pri.drop(columns=['LeadTime', 'HotelId'])

In [6]:
df_occ['Definitive'] = df_occ['Definitive'].astype('float')
df_occ['Rooms'] = df_occ['Rooms'].astype('float')

In [7]:
print("Length of Occupancy : ",len(df_occ))
df_occ.dtypes

Length of Occupancy :  79492


TargetDate      datetime64[ns]
Definitive             float64
Rooms                  float64
SnapshotDate    datetime64[ns]
dtype: object

In [8]:
print("Length of Price : ",len(df_pri))
df_pri.dtypes

Length of Price :  53196


TargetDate      datetime64[ns]
LAR                    float64
SnapshotDate    datetime64[ns]
dtype: object

In [9]:
df_occ.head()

Unnamed: 0,TargetDate,Definitive,Rooms,SnapshotDate
6513161,2018-01-01,58.0,290.0,2017-10-05
6513160,2018-01-01,58.0,290.0,2017-10-06
6513159,2018-01-01,58.0,290.0,2017-10-08
6513158,2018-01-01,63.0,290.0,2017-10-12
6513157,2018-01-01,62.0,290.0,2017-10-13


In [10]:
df_pri.head()

Unnamed: 0,TargetDate,LAR,SnapshotDate
2121884,2018-01-01,76.05,2017-11-07
2121888,2018-01-01,109.41,2017-11-13
2121891,2018-01-01,109.41,2017-11-14
2121893,2018-01-01,109.41,2017-11-16
2121897,2018-01-01,109.41,2017-11-17


In [11]:
df = pd.merge(df_occ, df_pri, left_on=['TargetDate','SnapshotDate'], right_on=['TargetDate','SnapshotDate'], how='outer')

In [12]:
df = df.sort_values(by=['TargetDate','SnapshotDate'])
df = df.drop_duplicates(subset=['TargetDate', 'SnapshotDate'], keep='last')
df = df.reset_index(drop=True)

In [13]:
def plot_booking_curve(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['SnapshotDate'], y=df['Rooms'],mode='lines',name='Max Capacity'))
    fig.add_trace(go.Scatter(x=df['SnapshotDate'], y=df['Definitive'],mode='lines+markers',name='ROH'))
    fig.show()

def plot_price(df):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['SnapshotDate'], y=df['LAR'],mode='lines',name='Price'))
    fig.show()    

def plot_definative(df):
    fig = go.Figure(data=go.Heatmap(x=df['TargetDate'], y=df['SnapshotDate'], z=df['Definitive'], colorscale='Viridis'))
    fig.update_layout(
                    title='Definative Values of Hotel',
                    xaxis=dict(
                        #     rangeselector=dict(
                        #         buttons=list([
                        #                 dict(count=1,label="1m",step="month",stepmode="backward"),
                        #                 dict(count=6,label="6m",step="month",stepmode="backward"),
                        #                 dict(count=1,label="YTD",step="year",stepmode="todate"),
                        #                 dict(count=1,label="1y",step="year",stepmode="backward"),
                        #                 dict(step="all")])
                        #     ),

                            rangeslider=dict(visible=True),
                            type="date", 
                            title_text = "Target date"
                    ), 
                    yaxis = dict(title_text = 'Lead Time (days)',autorange="reversed")
     )
    return fig    

In [29]:
dx = df[df['TargetDate']=='2018-03-01']

In [30]:
plot_booking_curve(dx)
plot_price(dx)

## Imputation

In [20]:
df = pd.read_pickle('dataset/preprocessed/284.pkl')

In [21]:
df['Definitive'] = df['Definitive'].astype('float')
df['Rooms'] = df['Rooms'].astype('float')

df.dtypes

TargetDate      datetime64[ns]
Definitive             float64
Rooms                  float64
SnapshotDate    datetime64[ns]
LAR                    float64
dtype: object

In [22]:
df.describe()

Unnamed: 0,Definitive,Rooms,LAR
count,79492.0,79492.0,48586.0
mean,110.493446,289.671665,70.12701
std,73.368848,9.752462,31.304937
min,0.0,0.0,30.24
25%,50.0,290.0,50.4
50%,95.0,290.0,62.71
75%,158.0,290.0,76.31
max,301.0,290.0,891.61


In [47]:
plot_definative(df)

In [35]:
targetdates = df['TargetDate'].unique()
# Bad solution
for targetdate in targetdates:
    df[df['TargetDate']==targetdate] = df[df['TargetDate']==targetdate].ffill().bfill()

In [44]:
dx = df[df['TargetDate']=='2018-02-07']

In [42]:
df[df['LAR'].isna()]

Unnamed: 0,TargetDate,Definitive,Rooms,SnapshotDate,LAR
2959,2018-02-07,9.0,290.0,2017-11-10,
2960,2018-02-07,9.0,290.0,2017-11-11,
2961,2018-02-07,9.0,290.0,2017-11-12,
2962,2018-02-07,9.0,290.0,2017-11-13,
2963,2018-02-07,9.0,290.0,2017-11-14,
...,...,...,...,...,...
65602,2020-02-22,282.0,290.0,2020-02-18,
65603,2020-02-22,281.0,290.0,2020-02-19,
65604,2020-02-22,281.0,290.0,2020-02-20,
65605,2020-02-22,276.0,290.0,2020-02-21,


In [46]:
plot_booking_curve(dx)
plot_price(dx)

In [45]:
plot_booking_curve(dx.ffill().bfill())
plot_price(dx.ffill().bfill())

In [50]:
df = df.dropna()

In [51]:
df.isna().sum()

TargetDate      0
Definitive      0
Rooms           0
SnapshotDate    0
LAR             0
dtype: int64

## Learning Algorithm

In [54]:
df = pd.read_pickle('dataset/preprocessed/284_imputed.pkl')

In [56]:
df.head()

Unnamed: 0,TargetDate,Definitive,Rooms,SnapshotDate,LAR
0,2018-01-01,58.0,290.0,2017-10-05,76.05
1,2018-01-01,58.0,290.0,2017-10-06,76.05
2,2018-01-01,58.0,290.0,2017-10-08,76.05
3,2018-01-01,63.0,290.0,2017-10-12,76.05
4,2018-01-01,62.0,290.0,2017-10-13,76.05
