# install libraries

In [None]:
!pip install --upgrade pip
!pip install "snowflake-connector-python[pandas]" "snowflake-snowpark-python[pandas]" snowflake-snowpark-python==1.9.0 numpy pandas matplotlib scikit-learn xgboost seaborn python-dateutil tqdm holidays faker
!pip install --upgrade --q snowflake-snowpark-python==1.9.0
!pip uninstall urllib3 -y
!pip install urllib3==1.26.15
!pip install fosforml==1.1.6

# Import helper libraries

In [21]:
from fosforml import *
from fosforml.constants import MLModelFlavours
from matplotlib import pyplot as plt
import pandas as pd
pd.set_option('display.max_columns', 500)
import seaborn as sns
from sklearn.metrics import mean_absolute_percentage_error
import numpy as np
import warnings; warnings.simplefilter('ignore')
from joblib import dump, load
import requests
from tqdm import tqdm
import time
import calendar

from time import sleep
import configparser
from dateutil.relativedelta import relativedelta
import datetime
from dateutil.easter import easter
from scipy.optimize import minimize_scalar
from scipy.optimize import curve_fit

%matplotlib inline

# connect to snowflake

In [2]:
from fosforml.model_manager.snowflakesession import get_session
my_session = get_session()

In [3]:
table_name = 'BOOKINGS_TRANSFORMED'

In [4]:
data = my_session.sql("select * from {}".format(table_name))

In [5]:
data = data.to_pandas()
data

Unnamed: 0,HOTEL,IS_CANCELED,LEAD_TIME,ARRIVAL_DATE_YEAR,MONTH,ARRIVAL_DATE_WEEK_NUMBER,ARRIVAL_DATE_DAY_OF_MONTH,EXPECTED_ARRIVAL_DATE,RESERVATION_STATUS,RESERVATION_STATUS_DATE,TOTAL_STAY_NIGHTS,TALLY_DAYS,STAYS_IN_WEEKEND_NIGHTS,STAYS_IN_WEEK_NIGHTS,ADULTS,CHILDREN,BABIES,TOTAL_GUESTS,AVG_ROOMS_PER_NIGHT,TOTAL_ROOM_NIGHTS,MEAL,COUNTRY,MARKET_SEGMENT,DISTRIBUTION_CHANNEL,PREVIOUS_CANCELLATIONS,PREVIOUS_BOOKINGS_NOT_CANCELED,RESERVED_ROOM_TYPE,ASSIGNED_ROOM_TYPE,DEPOSIT_TYPE,DAYS_IN_WAITING_LIST,CUSTOMER_TYPE,ADR,ARRIVAL_DATE_TRANSFORMED,RESERVATION_STATUS_DATE_TRANSFORMED
0,City Hotel,0,226,2023,8,35,29,2023-08-29,Check.Out,2023-01-15,4,4.000,2,2,2,0,0,2,1,4,BB,FRA,Offline TA/TO,TA/TO,0,0,A,A,No Deposit,0,Transient.Party,70.00,2023-08-29,2023-01-15
1,Resort Hotel,1,210,2024,3,9,3,2024-03-03,Canceled,2023-08-06,7,0.183,2,5,2,0,0,2,1,7,HB,PRT,Offline TA/TO,TA/TO,0,0,E,E,No Deposit,0,Transient,90.00,2024-03-03,2023-08-06
2,City Hotel,1,127,2023,9,37,12,2023-09-12,Canceled,2023-05-08,3,0.100,2,1,2,0,0,2,1,3,SC,BRA,Online TA,TA/TO,0,0,A,A,No Deposit,0,Transient,79.20,2023-09-12,2023-05-08
3,Resort Hotel,0,93,2023,8,34,27,2023-08-27,Check.Out,2023-05-26,6,6.000,2,4,2,0,0,2,1,6,HB,AUT,Online TA,TA/TO,0,0,E,E,No Deposit,0,Transient,102.93,2023-08-27,2023-05-26
4,Resort Hotel,0,18,2023,9,35,1,2023-09-01,Check.Out,2023-08-14,1,1.000,0,1,2,0,0,2,1,1,BB,ESP,Online TA,TA/TO,0,0,E,E,No Deposit,0,Transient.Party,80.00,2023-09-01,2023-08-14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109613,City Hotel,0,81,2023,8,35,30,2023-08-30,Check.Out,2023-06-10,3,3.000,2,1,2,0,0,2,1,3,BB,IRL,Direct,Direct,0,0,A,A,No Deposit,0,Transient,79.80,2023-08-30,2023-06-10
109614,City Hotel,0,18,2023,8,35,30,2023-08-30,Check.Out,2023-08-12,3,3.000,2,1,2,0,0,2,1,3,SC,BEL,Online TA,GDS,0,0,E,E,No Deposit,0,Transient,145.00,2023-08-30,2023-08-12
109615,City Hotel,0,47,2023,8,35,31,2023-08-31,Check.Out,2023-07-15,2,2.000,1,1,2,0,0,2,1,2,BB,PRT,Direct,Direct,0,0,E,E,No Deposit,0,Transient,135.44,2023-08-31,2023-07-15
109616,City Hotel,1,72,2023,10,42,19,2023-10-19,Canceled,2023-08-08,4,0.470,1,3,2,0,0,2,1,4,SC,FRA,Online TA,TA/TO,0,0,A,A,No Deposit,0,Transient,74.80,2023-10-19,2023-08-08


In [None]:
# snowflake.get_connection(connection_name="TTH_REV_OPT_CXN")
# data = get_dataframe("BOOKINGS_TRANSFORMED")
# data

In [6]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 109618 entries, 0 to 109617
Data columns (total 34 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   HOTEL                                109618 non-null  object 
 1   IS_CANCELED                          109618 non-null  int8   
 2   LEAD_TIME                            109618 non-null  int16  
 3   ARRIVAL_DATE_YEAR                    109618 non-null  int16  
 4   MONTH                                109618 non-null  int8   
 5   ARRIVAL_DATE_WEEK_NUMBER             109618 non-null  int8   
 6   ARRIVAL_DATE_DAY_OF_MONTH            109618 non-null  int8   
 7   EXPECTED_ARRIVAL_DATE                109618 non-null  object 
 8   RESERVATION_STATUS                   109618 non-null  object 
 9   RESERVATION_STATUS_DATE              109618 non-null  object 
 10  TOTAL_STAY_NIGHTS                    109618 non-null  int8   
 11  TALLY_DAYS   

In [7]:
data.columns = map(lambda x: str(x).lower(), data.columns)

# removing Canceletions and no-shows and keep City hotel data only


In [8]:
def update_values(row):
    if row['hotel'] == 'City Hotel' and row['reserved_room_type'] == 'A':
        return 150
    elif row['hotel'] == 'City Hotel' and row['reserved_room_type'] == 'D':
        return 80
    elif row['hotel'] == 'City Hotel' and row['reserved_room_type'] == 'E':
        return 30
    elif row['hotel'] == 'Resort Hotel' and row['reserved_room_type'] == 'A':
        return 150
    elif row['hotel'] == 'Resort Hotel' and row['reserved_room_type'] == 'D':
        return 75
    elif row['hotel'] == 'Resort Hotel' and row['reserved_room_type'] == 'E':
        return 60
    else:
        pass

In [9]:
data['room_limit'] = data.apply(update_values, axis=1)
data

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,month,arrival_date_week_number,arrival_date_day_of_month,expected_arrival_date,reservation_status,reservation_status_date,total_stay_nights,tally_days,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,total_guests,avg_rooms_per_night,total_room_nights,meal,country,market_segment,distribution_channel,previous_cancellations,previous_bookings_not_canceled,reserved_room_type,assigned_room_type,deposit_type,days_in_waiting_list,customer_type,adr,arrival_date_transformed,reservation_status_date_transformed,room_limit
0,City Hotel,0,226,2023,8,35,29,2023-08-29,Check.Out,2023-01-15,4,4.000,2,2,2,0,0,2,1,4,BB,FRA,Offline TA/TO,TA/TO,0,0,A,A,No Deposit,0,Transient.Party,70.00,2023-08-29,2023-01-15,150
1,Resort Hotel,1,210,2024,3,9,3,2024-03-03,Canceled,2023-08-06,7,0.183,2,5,2,0,0,2,1,7,HB,PRT,Offline TA/TO,TA/TO,0,0,E,E,No Deposit,0,Transient,90.00,2024-03-03,2023-08-06,60
2,City Hotel,1,127,2023,9,37,12,2023-09-12,Canceled,2023-05-08,3,0.100,2,1,2,0,0,2,1,3,SC,BRA,Online TA,TA/TO,0,0,A,A,No Deposit,0,Transient,79.20,2023-09-12,2023-05-08,150
3,Resort Hotel,0,93,2023,8,34,27,2023-08-27,Check.Out,2023-05-26,6,6.000,2,4,2,0,0,2,1,6,HB,AUT,Online TA,TA/TO,0,0,E,E,No Deposit,0,Transient,102.93,2023-08-27,2023-05-26,60
4,Resort Hotel,0,18,2023,9,35,1,2023-09-01,Check.Out,2023-08-14,1,1.000,0,1,2,0,0,2,1,1,BB,ESP,Online TA,TA/TO,0,0,E,E,No Deposit,0,Transient.Party,80.00,2023-09-01,2023-08-14,60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109613,City Hotel,0,81,2023,8,35,30,2023-08-30,Check.Out,2023-06-10,3,3.000,2,1,2,0,0,2,1,3,BB,IRL,Direct,Direct,0,0,A,A,No Deposit,0,Transient,79.80,2023-08-30,2023-06-10,150
109614,City Hotel,0,18,2023,8,35,30,2023-08-30,Check.Out,2023-08-12,3,3.000,2,1,2,0,0,2,1,3,SC,BEL,Online TA,GDS,0,0,E,E,No Deposit,0,Transient,145.00,2023-08-30,2023-08-12,30
109615,City Hotel,0,47,2023,8,35,31,2023-08-31,Check.Out,2023-07-15,2,2.000,1,1,2,0,0,2,1,2,BB,PRT,Direct,Direct,0,0,E,E,No Deposit,0,Transient,135.44,2023-08-31,2023-07-15,30
109616,City Hotel,1,72,2023,10,42,19,2023-10-19,Canceled,2023-08-08,4,0.470,1,3,2,0,0,2,1,4,SC,FRA,Online TA,TA/TO,0,0,A,A,No Deposit,0,Transient,74.80,2023-10-19,2023-08-08,150


In [10]:
data.columns

Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year', 'month',
       'arrival_date_week_number', 'arrival_date_day_of_month',
       'expected_arrival_date', 'reservation_status',
       'reservation_status_date', 'total_stay_nights', 'tally_days',
       'stays_in_weekend_nights', 'stays_in_week_nights', 'adults', 'children',
       'babies', 'total_guests', 'avg_rooms_per_night', 'total_room_nights',
       'meal', 'country', 'market_segment', 'distribution_channel',
       'previous_cancellations', 'previous_bookings_not_canceled',
       'reserved_room_type', 'assigned_room_type', 'deposit_type',
       'days_in_waiting_list', 'customer_type', 'adr',
       'arrival_date_transformed', 'reservation_status_date_transformed',
       'room_limit'],
      dtype='object')

In [11]:
df = data[(data['is_canceled'] == 0) & (data['reservation_status'] !='No-Show')] 
df

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,month,arrival_date_week_number,arrival_date_day_of_month,expected_arrival_date,reservation_status,reservation_status_date,total_stay_nights,tally_days,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,total_guests,avg_rooms_per_night,total_room_nights,meal,country,market_segment,distribution_channel,previous_cancellations,previous_bookings_not_canceled,reserved_room_type,assigned_room_type,deposit_type,days_in_waiting_list,customer_type,adr,arrival_date_transformed,reservation_status_date_transformed,room_limit
0,City Hotel,0,226,2023,8,35,29,2023-08-29,Check.Out,2023-01-15,4,4.0,2,2,2,0,0,2,1,4,BB,FRA,Offline TA/TO,TA/TO,0,0,A,A,No Deposit,0,Transient.Party,70.00,2023-08-29,2023-01-15,150
3,Resort Hotel,0,93,2023,8,34,27,2023-08-27,Check.Out,2023-05-26,6,6.0,2,4,2,0,0,2,1,6,HB,AUT,Online TA,TA/TO,0,0,E,E,No Deposit,0,Transient,102.93,2023-08-27,2023-05-26,60
4,Resort Hotel,0,18,2023,9,35,1,2023-09-01,Check.Out,2023-08-14,1,1.0,0,1,2,0,0,2,1,1,BB,ESP,Online TA,TA/TO,0,0,E,E,No Deposit,0,Transient.Party,80.00,2023-09-01,2023-08-14,60
6,Resort Hotel,0,141,2023,8,33,19,2023-08-19,Check.Out,2023-03-31,14,14.0,4,10,2,0,0,2,1,14,HB,GBR,Online TA,TA/TO,0,0,D,D,No Deposit,0,Transient,81.66,2023-08-19,2023-03-31,75
12,Resort Hotel,0,46,2023,8,35,30,2023-08-30,Check.Out,2023-07-15,3,3.0,2,1,2,0,0,2,1,3,BB,PRT,Online TA,TA/TO,0,0,A,A,No Deposit,0,Transient,53.41,2023-08-30,2023-07-15,150
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
109607,Resort Hotel,0,46,2023,8,34,26,2023-08-26,Check.Out,2023-07-11,7,7.0,2,5,2,0,0,2,1,7,BB,GBR,Direct,Direct,0,0,A,A,No Deposit,0,Transient,43.89,2023-08-26,2023-07-11,150
109613,City Hotel,0,81,2023,8,35,30,2023-08-30,Check.Out,2023-06-10,3,3.0,2,1,2,0,0,2,1,3,BB,IRL,Direct,Direct,0,0,A,A,No Deposit,0,Transient,79.80,2023-08-30,2023-06-10,150
109614,City Hotel,0,18,2023,8,35,30,2023-08-30,Check.Out,2023-08-12,3,3.0,2,1,2,0,0,2,1,3,SC,BEL,Online TA,GDS,0,0,E,E,No Deposit,0,Transient,145.00,2023-08-30,2023-08-12,30
109615,City Hotel,0,47,2023,8,35,31,2023-08-31,Check.Out,2023-07-15,2,2.0,1,1,2,0,0,2,1,2,BB,PRT,Direct,Direct,0,0,E,E,No Deposit,0,Transient,135.44,2023-08-31,2023-07-15,30


In [12]:
df.groupby(['hotel','market_segment','reserved_room_type']).agg({'adr':'mean','reservation_status_date_transformed':'count'})


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,adr,reservation_status_date_transformed
hotel,market_segment,reserved_room_type,Unnamed: 3_level_1,Unnamed: 4_level_1
City Hotel,Aviation,A,96.503608,97
City Hotel,Aviation,D,107.713951,81
City Hotel,Aviation,E,159.25,2
City Hotel,Complementary,A,47.880952,21
City Hotel,Complementary,D,25.0,4
City Hotel,Complementary,E,55.0,2
City Hotel,Corporate,A,80.089309,2069
City Hotel,Corporate,D,100.68913,138
City Hotel,Corporate,E,114.887931,87
City Hotel,Direct,A,107.774828,3144


In [13]:
data = df[(df.market_segment != 'Complementary') ]
data = data[(data.reserved_room_type == 'A') |(data.reserved_room_type == 'D') | (data.reserved_room_type == 'E')]
data.reserved_room_type.value_counts()


reserved_room_type
A    50832
D    12845
E     4483
Name: count, dtype: int64

In [None]:
data.info()


In [14]:
data_backup = data.copy()

In [15]:
hotels = data['hotel'].unique()
room_types = data['reserved_room_type'].unique()

In [16]:
data['total_rns'] = data['stays_in_week_nights'] + data['stays_in_weekend_nights']
data_to_transform = data[['hotel','reserved_room_type','arrival_date_transformed','total_rns','adr', 'room_limit']]
data_to_transform

Unnamed: 0,hotel,reserved_room_type,arrival_date_transformed,total_rns,adr,room_limit
0,City Hotel,A,2023-08-29,4,70.00,150
3,Resort Hotel,E,2023-08-27,6,102.93,60
4,Resort Hotel,E,2023-09-01,1,80.00,60
6,Resort Hotel,D,2023-08-19,14,81.66,75
12,Resort Hotel,A,2023-08-30,3,53.41,150
...,...,...,...,...,...,...
109607,Resort Hotel,A,2023-08-26,7,43.89,150
109613,City Hotel,A,2023-08-30,3,79.80,150
109614,City Hotel,E,2023-08-30,3,145.00,30
109615,City Hotel,E,2023-08-31,2,135.44,30


import os
from snowflake.snowpark.session import Session
user = os.getenv("user")
warehouse = os.getenv("warehouse")
schema= os.getenv("schema")
database = os.getenv("database")
role =  os.getenv("role")
account =  os.getenv("account")
password= os.getenv("password")

connection_params = dict(user=user, 
                         password=password, 
                         account=account, 
                         warehouse=warehouse, 
                         database=database,
                         schema=schema, 
                         role=role)

session = Session.builder.configs(connection_params).create()

session.sql('use warehouse {};'.format(warehouse)).collect()

session.sql('use database {};'.format(database)).collect()

session.sql('use schema {}.{};'.format(database, schema)).collect()

# Create a new dataframe to store the data by stay date

In [18]:
expanded_df = pd.DataFrame()

for _, row in data.iterrows():
    num_stay_dates = row['total_rns']
    try:
        # Create a row for each stay date
        expanded_booking = pd.DataFrame({
            'hotel': row['hotel'],
            'room_type': row['reserved_room_type'], 
            'arrival_date': pd.date_range(start=row['expected_arrival_date'], periods=num_stay_dates),
            'total_rns': 1,
            'adr': row['adr'],
            'room_limit': row['room_limit']
        })
        
        # Append the stay date information to the new dataframe
        expanded_df = pd.concat([expanded_df, expanded_booking], ignore_index=True)
    except ValueError as e:
        print(f"Error processing booking for {row['hotel']} on {row['expected_arrival_date']} : {num_stay_dates} {e}")

# Sort the final dataframe by date

In [19]:
expanded_df = expanded_df.sort_values('arrival_date')
expanded_df = expanded_df.reset_index(drop=True)
expanded_df

Unnamed: 0,hotel,room_type,arrival_date,total_rns,adr,room_limit
0,City Hotel,A,2022-05-01,1,80.00,150
1,Resort Hotel,D,2022-05-01,1,153.00,75
2,City Hotel,A,2022-05-01,1,101.50,150
3,City Hotel,A,2022-05-01,1,80.00,150
4,City Hotel,A,2022-05-01,1,101.50,150
...,...,...,...,...,...,...
232598,Resort Hotel,E,2024-07-11,1,153.57,60
232599,Resort Hotel,A,2024-07-12,1,99.06,150
232600,Resort Hotel,D,2024-07-12,1,112.80,75
232601,Resort Hotel,A,2024-07-13,1,99.06,150


In [20]:
expanded_df['adr']= np.round(expanded_df['adr'], 2)

expanded_df


Unnamed: 0,hotel,room_type,arrival_date,total_rns,adr,room_limit
0,City Hotel,A,2022-05-01,1,80.00,150
1,Resort Hotel,D,2022-05-01,1,153.00,75
2,City Hotel,A,2022-05-01,1,101.50,150
3,City Hotel,A,2022-05-01,1,80.00,150
4,City Hotel,A,2022-05-01,1,101.50,150
...,...,...,...,...,...,...
232598,Resort Hotel,E,2024-07-11,1,153.57,60
232599,Resort Hotel,A,2024-07-12,1,99.06,150
232600,Resort Hotel,D,2024-07-12,1,112.80,75
232601,Resort Hotel,A,2024-07-13,1,99.06,150


In [None]:
##check
expanded_df.head()

In [None]:
# Add dow, month to data
expanded_df['dow'] = expanded_df.arrival_date.dt.strftime('%A')
expanded_df['month'] = expanded_df.arrival_date.dt.strftime('%B')

In [None]:
##check
expanded_df.head()

In [None]:
expanded_df.shape

In [None]:
expanded_df.info()

In [None]:
daily_rns= expanded_df.groupby(['arrival_date','dow','month', 'hotel', 'room_type']).agg({'room_limit': 'mean', 'total_rns':'sum'}).reset_index() # ge total stays per day

daily_rns = daily_rns.groupby(['dow','month', 'hotel', 'room_type']).agg({'room_limit': 'mean','total_rns':['sum','mean','median']}).reset_index() # get Rns metrics by Dow & Month

daily_rns.columns = ['_'.join(col) for col in daily_rns.columns] #remove multi level column
daily_rns

In [None]:
adr_frequency = expanded_df.groupby(['dow','month','adr', 'hotel', 'room_type']).agg({'room_limit': 'mean','total_rns':'sum'})
adr_frequency.reset_index(inplace=True)

In [None]:
adr_frequency

In [None]:
merged_df = pd.merge(adr_frequency, daily_rns,how='left',left_on=['dow','month', 'hotel', 'room_type'], right_on=['dow_','month_', 'hotel_', 
                                                                                                                  'room_type_'],
                     suffixes=('_act', '_tot'))

merged_df = merged_df.drop(['dow_','month_'],axis=1)
merged_df

In [None]:
merged_df['probability'] = merged_df['total_rns']/merged_df['total_rns_sum']

In [None]:
merged_df['expected_rns'] = merged_df['probability'] * merged_df['total_rns_median']

In [None]:
merged_df = merged_df.sort_values(by=['dow', 'month', 'adr'], ascending=[True, True, False])

In [None]:
merged_df['expected_demand']=merged_df.groupby(['dow', 'month'])['expected_rns'].cumsum()

In [None]:
merged_df['expected_rev'] = merged_df['adr']* merged_df['expected_demand']

In [None]:
merged_df[(merged_df.dow == 'Friday') & (merged_df.month =='April')].plot(x='adr', y='expected_demand', kind='line')

In [None]:
merged_df[(merged_df.dow == 'Friday') & (merged_df.month =='April')].plot(x='adr', y='expected_rev', kind='line')

In [None]:
merged_df

In [None]:
from scipy.optimize import curve_fit

# Define the demand curve function
def demand_curve(x, a, b, c, d, max_demand):
    demand = a * np.exp(-b * x) + c
    demand = np.where(x <= max_demand, np.minimum(demand, max_demand), demand)
    return demand + d


In [None]:
x_data = merged_df['adr'].values
y_data = merged_df['expected_demand'].values


initial_guess = [1, 0.01, 1, 1, 100]
bounds = ([0, 0, 0, 0, 0], [np.inf, np.inf, np.inf, np.inf, np.inf])

params, _ = curve_fit(demand_curve, x_data, y_data, bounds=bounds, p0=initial_guess)

a_fit, b_fit, c_fit ,d_fit,max_demand= params

In [None]:
predicted_demand = demand_curve(x_data, a_fit, b_fit,c_fit,d_fit,max_demand)

In [None]:
plt.scatter(x_data, y_data, label='Actual Demand')
plt.plot(x_data, predicted_demand, label='Fitted Curve')
plt.xlabel('Price')
plt.ylabel('Demand')
plt.legend()
plt.title('Demand Curve Fit')
plt.show()

In [None]:
def revenue(price):
    return price * demand_curve(price, a_fit, b_fit,c_fit,d_fit,max_demand)

In [None]:
objective = lambda price: -revenue(price)
from scipy.optimize import minimize_scalar

result = minimize_scalar(objective, bounds=(60, 180), method='bounded')
optimal_price = result.x
max_revenue = -result.fun
room_sold = demand_curve(optimal_price, a_fit, b_fit,c_fit,d_fit,max_demand)

In [None]:
print(f"The optimal price to maximize revenue: ${optimal_price}")
print(f"The maximum revenue achievable: ${max_revenue}")
print(f"The expected number of rooms to sell: {room_sold}")

In [None]:
from scipy.optimize import brentq

def demand_to_price(num_rooms, a, b, c, d, max_demand):
    def root_func(x):
        return num_rooms - (a * np.exp(-b * x) + c)

    try:
        price = brentq(root_func, 0, 200)  # Adjust the interval bounds as needed
    except ValueError:
        # Fallback to default price if no root is found
        price_range=(0, 200)
        price = np.random.uniform(*price_range)

    return price

# from scipy.optimize import brentq

# def demand_to_price(num_rooms, a, b, c, d, max_demand):
   
#     def root_func(x):
#         return num_rooms - (a * np.exp(-b * x) + c)
    
#     # Find the price using numerical root finding
#     price = brentq(root_func, 0, 200)  # Adjust the interval bounds as needed
    
#     return price

In [None]:
demand_to_price(50,a_fit,b_fit,c_fit,d_fit,max_demand)

In [None]:
results = pd.DataFrame(columns=['month', 'hotel','room_limit', 'room_type', 'dow', 'optimal_rate', 'expected_rn','expected_rev','optimal_rate_lim_inv'])

In [None]:
months = merged_df.month.unique()
dow = merged_df.dow.unique()

In [None]:
merged_df

In [None]:
for hotel in hotels:
    for room_type in room_types:
        for month in months:
            for day in dow:
                # Get data for the specific combination
                data_subset = merged_df[(merged_df['dow'] == day) & 
                                        (merged_df['hotel'] == hotel) & 
                                        (merged_df['room_type'] == room_type) & 
                                        (merged_df['month'] == month)].reset_index()
                
                if data_subset.empty:
                    continue

                # Remove outliers
                mean = data_subset['adr'].mean()
                std_dev = data_subset['adr'].std()
                data_subset['z_scores'] = np.abs((data_subset['adr'] - mean) / std_dev)
                data_subset = data_subset[data_subset['z_scores'] <= 2]

                # Fit demand curve
                x_data = data_subset['adr'].values
                y_data = data_subset['expected_demand'].values

                try:
                    initial_guess = [1, 0.01, 1, 1, data_subset['total_rns_median'].values[0]]
                    bounds = ([0, 0, 0, 0, 0], [np.inf, np.inf, np.inf, np.inf, np.inf])
                    maxfev = 10000  # Increase the number of maximum function evaluations
                    params, _ = curve_fit(demand_curve, x_data, y_data, bounds=bounds, p0=initial_guess, maxfev=maxfev)
                except RuntimeError as e:
                    print(f"Error fitting demand curve for {hotel}, {room_type}, {month}, {day}: {e}")
                    continue

                a_fit, b_fit, c_fit, d_fit, max_demand = params

                # Optimize revenue
                def revenue(price):
                    return price * demand_curve(price, a_fit, b_fit, c_fit, d_fit, max_demand)

                objective = lambda price: -revenue(price)
                optimize = minimize_scalar(objective, bounds=(45, 200), method='bounded')
                optimal_price = optimize.x
                max_revenue = -optimize.fun
                expected_rns = demand_curve(optimal_price, a_fit, b_fit, c_fit, d_fit, max_demand)

                optimal_rate_lim_inv = demand_to_price(data_subset['room_limit'].mean(), a_fit, b_fit, c_fit, d_fit, max_demand)

                new_row = pd.DataFrame({'hotel': hotel,
                                        'room_type': room_type,
                                        'room_limit': data_subset['room_limit'].mean(),
                                        'month': month,
                                        'dow': day,
                                        'optimal_rate': optimal_price,
                                        'expected_rev': max_revenue,
                                        'expected_rn': expected_rns,
                                        'optimal_rate_lim_inv': optimal_rate_lim_inv}, index=[0])
                results = pd.concat([results, new_row], ignore_index=True)


In [None]:
results

In [None]:
#clean up the results
results['optimal_rate'] = results['optimal_rate'].round()
results['optimal_rate_lim_inv'] = results['optimal_rate_lim_inv'].round()

results['expected_rn'] = results['expected_rn'].round().astype(int)
results['expected_rev'] = results['expected_rev'].round()
results

In [None]:
results

In [None]:
results[(results['month'] == 'April') & (results['room_type'] == 'A')]

In [None]:
import itertools

hotel_types = ['Resort Hotel', 'City Hotel']
room_types = ['A', 'D', 'E']

combinations = list(itertools.product(hotel_types, room_types))

combinations

combinations_df = pd.DataFrame(combinations, columns=['hotel', 'room_type'])
combinations_df

In [None]:
month_dict = {month: index for index, month in enumerate(pd.date_range('2020-01-01', periods=12, freq='M').strftime('%B'), 1)}

new_data = pd.DataFrame()

start_date = pd.Timestamp('2022-05-01')
end_date = pd.Timestamp('2024-06-30')

for year in range(start_date.year, end_date.year + 1):
    for month in month_dict.values():
        if year == start_date.year and month < start_date.month:
            continue
        if year == end_date.year and month > end_date.month:
            continue
        
        month_start = pd.to_datetime(f'{year}-{month:02d}-01')
        month_end = (month_start + pd.offsets.MonthEnd(0))
        date_range = pd.date_range(month_start, month_end, freq='D')
        
        date_range = date_range[(date_range >= start_date) & (date_range <= end_date)]
        
        df = pd.DataFrame(date_range, columns=['arrival_date'])
        df['dow'] = df['arrival_date'].dt.day_name()
        df['month'] = df['arrival_date'].dt.month_name()
        result_df = df.assign(key=1).merge(combinations_df.assign(key=1), on='key').drop('key', axis=1)
        new_data = pd.concat([new_data, result_df], ignore_index=True)

new_data

In [None]:
final_data = pd.merge(new_data, results, how='left', on=['dow', 'hotel', 'room_type', 'month'])
final_data

In [None]:
final_data['arrival_date'] = pd.to_datetime(final_data['arrival_date'])

In [None]:
final_data

In [None]:
final_data[final_data["optimal_rate_lim_inv"] > final_data["optimal_rate"]]

In [None]:
final_data

In [None]:
4110/4752

In [None]:
final_data[final_data["expected_rn"] > final_data["room_limit"]]

In [None]:
4417/4752

In [None]:
final_data.isna().sum()

In [None]:
final_data = final_data.dropna(axis=0)

In [None]:
final_data

In [None]:
data_backup['arrival_date_transformed'] = pd.to_datetime(data_backup['arrival_date_transformed'])
final_data['arrival_date'] = pd.to_datetime(final_data['arrival_date'])

In [None]:
rev_opt_booking = pd.merge(data_backup, final_data, right_on=['arrival_date', 'hotel', 'room_type'], left_on=['arrival_date_transformed', 'hotel', 'reserved_room_type'], how='left')
rev_opt_booking.to_csv('rev_opt_booking.csv', index=False)

In [None]:
rev_opt_booking

In [None]:
rev_opt_booking.drop(['month_y', 'room_limit_y'], axis=1, inplace=True)

In [None]:
rev_opt_booking.rename(columns={"month_x": 'month', 'room_limit_x': 'room_limit'}, inplace=True)

In [None]:
rev_opt_booking["arrival_data"] = pd.to_datetime(rev_opt_booking["arrival_date"])
rev_opt_booking["arrival_date_transformed"] = pd.to_datetime(rev_opt_booking["arrival_date_transformed"])

In [None]:
rev_opt_booking.columns = map(lambda x: str(x).lower(), rev_opt_booking.columns)


In [None]:
rev_opt_booking.rename(columns={'arrival_date_year':'year',
                                'optimal_rate' : 'optimal_adr', 'optimal_rate_lim_inv': 'optimal_adr_limited',
                                'transcation_ids': 'booking_id', 'reservation_status_date_transformed': 'reservation_date'
                               }, inplace=True)

In [None]:
rev_opt_booking.info()

In [None]:
rev_opt_booking['room_limit'] = rev_opt_booking.apply(update_values, axis=1)

In [None]:
rev_opt_booking = rev_opt_booking.drop([ 'reservation_status_date',  'avg_rooms_per_night','reserved_room_type', 'tally_days', 'total_room_nights'], axis=1)

In [None]:
rev_opt_booking.columns

In [None]:
rev_opt_booking['adr'] = rev_opt_booking['adr'].astype(int)

In [None]:
rev_opt_booking = rev_opt_booking[['arrival_date', 'hotel', 'adr', 'is_canceled', 'lead_time',  'babies', 'children', 'country', 'customer_type',  'cust_id', 'days_in_waiting_list', 'deposit_type', 'distribution_channel', 'dow', 'expected_rev','expected_rn', 'market_segment', 'meal', 'month', 'optimal_adr',  'optimal_adr_limited', 'previous_bookings_not_canceled', 'previous_cancellations', 'reservation_status','reservation_date', 'room_type', 'room_limit', 'stays_in_weekend_nights', 'stays_in_week_nights',  'total_guests', 'total_stay_nights', 'booking_id']]

In [None]:
rev_opt_booking

In [None]:
rev_opt_booking["reservation_date"] = pd.to_datetime(rev_opt_booking["reservation_date"]).dt.strftime("%Y-%m-%d %H:%M:%S")
rev_opt_booking["arrival_date"] = pd.to_datetime(rev_opt_booking["arrival_date"]).dt.strftime("%Y-%m-%d %H:%M:%S")

rev_opt_booking["reservation_date"]

In [None]:
rev_opt_booking["arrival_date"]

In [None]:
rev_opt_booking.isna().sum()

In [None]:
rev_opt_booking = rev_opt_booking.dropna(axis=0)

In [None]:
rev_opt_booking.isna().sum()

In [None]:
rev_opt_booking.shape

In [None]:
df_model=session.createDataFrame(
        rev_opt_booking.values.tolist(),
        schema=rev_opt_booking.columns.tolist())
df_model.write.mode("overwrite").save_as_table("TTH_DB.TTH_REV_OPT_Schema.REV_OPT_OUTPUT")

# Booking results

In [None]:
snowflake.get_connection(connection_name="TTH_REV_OPT_CXN")
data = get_dataframe("BOOKINGS_TRANSFORMED")
data

In [None]:
data.columns = map(lambda x: str(x).lower(), data.columns)

In [None]:
expanded_dfc1 = pd.DataFrame()

for _, row in data.iterrows():
    num_stay_dates = row['total_rns']
    try:
        # Create a row for each stay date
        expanded_bookingc1 = pd.DataFrame({
            'hotel': row['hotel'],
            'room_type': row['reserved_room_type'], 
            'market_segment': row['market_segment'],
            'deposit_type':row['deposit_type'],
            'meal': row['meal'],
            'distribution_channel': row['distribution_channel'],
            'customer_type': row['customer_type'],
            'arrival_date': pd.date_range(start=row['arrival_date_transformed'], periods=num_stay_dates),
            'total_rns': 1,
            'adr': row['adr'],
            'room_limit': row['room_limit']
        })
        
        # Append the stay date information to the new dataframe
        expanded_dfc1 = pd.concat([expanded_dfc1, expanded_bookingc1], ignore_index=True)
    except ValueError as e:
        print(f"Error processing booking for {row['hotel']} on {row['arrival_date_transformed']} : {num_stay_dates} {e}")

In [None]:
expanded_dfc1 = expanded_dfc1.sort_values('arrival_date')
expanded_dfc1 = expanded_dfc1.reset_index(drop=True)
expanded_dfc1

In [None]:
expanded_dfc1['adr']= np.round(expanded_dfc1['adr'], 2)

expanded_dfc1


In [None]:
expanded_dfc1['dow'] = expanded_dfc1.arrival_date.dt.strftime('%A')
expanded_dfc1['month'] = expanded_dfc1.arrival_date.dt.strftime('%B')

In [None]:
booking_frequency = expanded_dfc1.groupby(['hotel','room_type','market_segment','deposit_type','meal',
                                        'distribution_channel','customer_type',
                                           'arrival_date'], as_index=False, sort= True).agg({'adr': 'mean','room_limit': 'mean',
                                                                'total_rns': 'sum'})

In [None]:
booking_frequency = booking_frequency.sort_values(by=['arrival_date','hotel','room_type'], ascending=[True, True, True])
booking_frequency.info()

In [None]:
booking_frequency["arrival_date"] = pd.to_datetime(booking_frequency["arrival_date"]).dt.strftime("%Y-%m-%d %H:%M:%S")


In [None]:
booking_frequency

In [None]:
df_inter=session.createDataFrame(
        booking_frequency.values.tolist(),
        schema=booking_frequency.columns.tolist())
df_inter.write.mode("overwrite").save_as_table("TTH_DB.TTH_REV_OPT_Schema.booking_frequency")

In [None]:
session.table(["TTH_DB.TTH_REV_OPT_Schema.booking_frequency"]).to_pandas()

In [None]:
booking_frequency.to_csv('./booking_frequency.csv', index=False)

In [None]:
booking_frequency