# Analysing the Hotel Rates (2004â€“2018)

In [1]:
# Dependencies
import pandas as pd
import numpy as np

In [2]:
# Load the csv file as a dataframe
hotelrates_df = pd.read_csv("Data/hotel_dailyrates.csv", encoding = "utf-8")
hotelrates_df.head()

Unnamed: 0,Average Daily Rate,Hotel Occupancy,Date
0,141.6038,0.738484,2004-07
1,143.1187,0.719571,2004-08
2,137.4212,0.750757,2004-09
3,141.5987,0.778466,2004-10
4,148.4632,0.682347,2004-11


## Converting average daily rates to monthly rates
- Create subsets for months that don't change number of days
- Create a subset for February (which has different days depending on the year)

### Regular months (30 and 31 days)

In [3]:
# Create a list of months in numerical format
month_30 = ["04","06","09","11"]
month_31 = ["01","03","05","07","08","10","12"]
month_28 = ["02"]

month_by_day = [month_30, month_31]
days = [30, 31]

In [4]:
# Define a function that extracts the month (2-digit format) from the Date column
def months(df,list):
    x = [df.loc[df["Date"].str.slice(-2) == item] for item in list]
    return x

In [5]:
# Create lists for months with 30 and 31 days whatever the year
month_grps = [months(hotelrates_df,grp) for grp in month_by_day] # groups of months based on number of days

avg_daily = [[(month["Average Daily Rate"]) for month in grp] for grp in month_grps] # daily average rates in each group
mo_rate_30 = [(i * days[0]) for i in avg_daily[0]] # monthly rates in the 30-day group       
mo_rate_31 = [(i * days[1]) for i in avg_daily[1]] # monthly rates in the 31-day group

In [6]:
# Create dataframes for months grouped by number of days (30,31)
df30 = pd.concat(month_grps[0]) # puts all values in the same columns
df30["Monthly Rate"] = pd.concat(mo_rate_30) # add the monthly rate as an additional column

df31 = pd.concat(month_grps[1]) # puts all values in the same columns
df31["Monthly Rate"] = pd.concat(mo_rate_31) # add the monthly rate as an additional column

df31.head()

Unnamed: 0,Average Daily Rate,Hotel Occupancy,Date,Monthly Rate
6,148.8121,0.7291,2005-01,4613.1751
18,160.7223,0.755878,2006-01,4982.3913
30,179.4341,0.727883,2007-01,5562.4571
42,184.5935,0.785091,2008-01,5722.3985
54,169.4801,0.742489,2009-01,5253.8831


### For February, if the year falls on a leap year, the number of days is 29. If not, 28.

In [7]:
# Extract a dataframe that contains the data for Feb
Feb = months(hotelrates_df,month_28)
Feb = Feb[0]

In [8]:
# Define a function that creates a list of leap years 
# Resource: https://stackoverflow.com/a/18951699/10205963
def leap(start,end):
    return [year for year in range(start, end + 1)
            if (year % 400 == 0) or (year % 4 == 0 and not year % 100 == 0)]

In [9]:
# Create a list of years from the Date column for Feb
years = [date[0:4] for date in Feb["Date"]] 
    # Feb[0] is a dataframe inside the Feb list
    # date[0:4] gives substrings from the Date column
    
years = [int(i) for i in years] # convert list of strings to list of int

In [10]:
# Get the leap years from the list of years
leap_yrs = leap(years[0], years[-1]) # years[0] = 1st; years[-1] = last
leap_yrs = [str(i) for i in leap_yrs]

In [11]:
# Create a subset for Feb data for leap years
leap_Feb = Feb.loc[Feb["Date"].str.contains("|".join(leap_yrs))]

# Calculate monthly rates for Feb leap years
leap_Feb_mo_rate = [(i * 29) for i in leap_Feb["Average Daily Rate"]] 

# Add the Monthly Rates as a new column
# Resource: https://stackoverflow.com/a/47694287/10205963

se = pd.Series(leap_Feb_mo_rate) # Convert the monthly rate list to a series
leap_Feb.insert(loc = 3, column = "Monthly Rate", value = se.values) # add the column Monthly Rate (no errors)
    
leap_Feb

Unnamed: 0,Average Daily Rate,Hotel Occupancy,Date,Monthly Rate
43,197.1489,0.811044,2008-02,5717.3181
91,213.3869,0.822019,2012-02,6188.2201
139,315.37,0.935458,2016-02,9145.73


In [12]:
# Create a subset for Feb data for regular years
reg_Feb = Feb.loc[~Feb["Date"].str.contains("|".join(leap_yrs))]

# Calculate monthly rates for Feb regular years
reg_Feb_mo_rate = [(i * 28) for i in reg_Feb["Average Daily Rate"]] 


# Add the Monthly Rates as a new column

se2 = pd.Series(reg_Feb_mo_rate) # Convert the monthly rate list to a series
reg_Feb.insert(loc = 3, column = "Monthly Rate", value = se2.values) # add the column Monthly Rate (no errors)

reg_Feb

Unnamed: 0,Average Daily Rate,Hotel Occupancy,Date,Monthly Rate
7,151.1379,0.740805,2005-02,4231.8612
19,161.3353,0.723245,2006-02,4517.3884
31,186.9618,0.791289,2007-02,5234.9304
55,157.9055,0.648616,2009-02,4421.354
67,150.5339,0.762755,2010-02,4214.9492
79,181.8734,0.837384,2011-02,5092.4552
103,201.8483,0.854944,2013-02,5651.7524
115,234.2935,0.898843,2014-02,6560.218
127,236.7917,0.883478,2015-02,6630.1676
151,292.0611,0.91451,2017-02,8177.7108


In [13]:
# Concatenate the two dataframes together (Feb)
dfFeb = pd.concat([leap_Feb,reg_Feb])
dfFeb

Unnamed: 0,Average Daily Rate,Hotel Occupancy,Date,Monthly Rate
43,197.1489,0.811044,2008-02,5717.3181
91,213.3869,0.822019,2012-02,6188.2201
139,315.37,0.935458,2016-02,9145.73
7,151.1379,0.740805,2005-02,4231.8612
19,161.3353,0.723245,2006-02,4517.3884
31,186.9618,0.791289,2007-02,5234.9304
55,157.9055,0.648616,2009-02,4421.354
67,150.5339,0.762755,2010-02,4214.9492
79,181.8734,0.837384,2011-02,5092.4552
103,201.8483,0.854944,2013-02,5651.7524


In [14]:
# Concatenate all dataframes (30-day, 31-day, Feb)
hotelrates_df2 = pd.concat([df30, df31, dfFeb])
hotelrates_df2.head()

Unnamed: 0,Average Daily Rate,Hotel Occupancy,Date,Monthly Rate
9,157.8543,0.740657,2005-04,4735.629
21,169.9458,0.770442,2006-04,5098.374
33,183.4274,0.764485,2007-04,5502.822
45,183.7442,0.768456,2008-04,5512.326
57,157.9462,0.743636,2009-04,4738.386


In [15]:
# Correct the monthly hotel rates based on the hotel occupancy rates
se3 = pd.Series(hotelrates_df2["Monthly Rate"] * hotelrates_df2["Hotel Occupancy"])
hotelrates_df2.insert(loc = 4, column = "Corrected Monthly Rate", value = se3.values)

hotelrates_df2.head()

Unnamed: 0,Average Daily Rate,Hotel Occupancy,Date,Monthly Rate,Corrected Monthly Rate
9,157.8543,0.740657,2005-04,4735.629,3507.476839
21,169.9458,0.770442,2006-04,5098.374,3928.002226
33,183.4274,0.764485,2007-04,5502.822,4206.825554
45,183.7442,0.768456,2008-04,5512.326,4235.982601
57,157.9462,0.743636,2009-04,4738.386,3523.63218


### Calculate yearly hotel rates
- Assumes that the hotel rooms are occupied for 365 days