In [1]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt

In [2]:
#Read the ods File
path = "../Raw Data/apr20.ods"
df = pd.read_excel(path, engine="odf")
df.head()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,632,4.21%,2857,1805413,8714140,14081540
1,TheGrandHotel,Taipei,0,0,,0,0,0,0
2,台北華國⼤飯店,Taipei,326,596,6.09%,1883,1122468,6552844,8457149
3,ImperialHotelTaipei,Taipei,0,0,,0,0,0,0
4,華泰王⼦⼤飯店,Taipei,220,410,6.21%,1105,453249,5349049,7206770


In [3]:
#Clean Data 
df = pd.DataFrame(df, columns=['Hotel','Area','Room','Demand','Occupancy','ADR','Room_Revenue','F&B_Revenue','Total_Revenue'])
df.head()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,632,4.21%,2857,1805413,8714140,14081540
1,TheGrandHotel,Taipei,0,0,,0,0,0,0
2,台北華國⼤飯店,Taipei,326,596,6.09%,1883,1122468,6552844,8457149
3,ImperialHotelTaipei,Taipei,0,0,,0,0,0,0
4,華泰王⼦⼤飯店,Taipei,220,410,6.21%,1105,453249,5349049,7206770


In [4]:
df = df.dropna()
df

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,632,4.21%,2857,1805413,8714140,14081540
2,台北華國⼤飯店,Taipei,326,596,6.09%,1883,1122468,6552844,8457149
4,華泰王⼦⼤飯店,Taipei,220,410,6.21%,1105,453249,5349049,7206770
6,豪景⼤酒店,Taipei,209,54,0.86%,766,41350,0,286350
8,國王⼤飯店,Taipei,97,8,0.27%,3139,25113,102790,172483
...,...,...,...,...,...,...,...,...,...
240,台東桂⽥喜來登酒店,Others,278,2181,26.15%,2564,5591255,3854929,10360768
242,⻑榮桂冠酒店(基隆),Others,140,543,12.93%,2348,1275059,907045,3186952
244,鈺通⼤飯店,Others,120,216,6.00%,2151,464705,1181005,1677660
246,寶華⼤飯店,Others,78,101,4.32%,1374,138798,1560,142358


In [5]:
df.loc[df['Hotel']=='華泰王⼦⼤飯店']

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
4,華泰王⼦⼤飯店,Taipei,220,410,6.21%,1105,453249,5349049,7206770


In [6]:
print(df.dtypes)

Hotel            object
Area             object
Room              int64
Demand            int64
Occupancy        object
ADR               int64
Room_Revenue      int64
F&B_Revenue       int64
Total_Revenue     int64
dtype: object


df['Demand'] = df['Demand'].replace(',', '', regex=True)
df['ADR'] = df['ADR'].replace(',', '', regex=True)
df['Room_Revenue'] = df['Room_Revenue'].replace(',', '', regex=True)
df['F&B_Revenue'] = df['F&B_Revenue'].replace(',', '', regex=True)
df['Total_Revenue'] = df['Total_Revenue'].replace(',', '', regex=True)

In [7]:
df['Occupancy'] = df['Occupancy'].str.rstrip('%').astype('float') / 100.0

In [8]:
df[df['Occupancy'].isnull()]

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue


In [9]:
df.astype({'Room': 'int64',
          'Demand': 'int64',
          'Occupancy': 'int64',
          'ADR': 'float64',
          'Room_Revenue': 'int64',
          'F&B_Revenue': 'int64',
          'Total_Revenue': 'int64',
          }).dtypes

Hotel             object
Area              object
Room               int64
Demand             int64
Occupancy          int64
ADR              float64
Room_Revenue       int64
F&B_Revenue        int64
Total_Revenue      int64
dtype: object

In [10]:
Day = 30

In [11]:
#Calculate/Add RevPAR and Demand
df['RevPAR'] = df['Occupancy']*df['ADR']
df['Supply'] = df['Room']*Day
df

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue,RevPAR,Supply
0,圓⼭⼤飯店,Taipei,500,632,0.0421,2857,1805413,8714140,14081540,120.2797,15000
2,台北華國⼤飯店,Taipei,326,596,0.0609,1883,1122468,6552844,8457149,114.6747,9780
4,華泰王⼦⼤飯店,Taipei,220,410,0.0621,1105,453249,5349049,7206770,68.6205,6600
6,豪景⼤酒店,Taipei,209,54,0.0086,766,41350,0,286350,6.5876,6270
8,國王⼤飯店,Taipei,97,8,0.0027,3139,25113,102790,172483,8.4753,2910
...,...,...,...,...,...,...,...,...,...,...,...
240,台東桂⽥喜來登酒店,Others,278,2181,0.2615,2564,5591255,3854929,10360768,670.4860,8340
242,⻑榮桂冠酒店(基隆),Others,140,543,0.1293,2348,1275059,907045,3186952,303.5964,4200
244,鈺通⼤飯店,Others,120,216,0.0600,2151,464705,1181005,1677660,129.0600,3600
246,寶華⼤飯店,Others,78,101,0.0432,1374,138798,1560,142358,59.3568,2340


In [12]:
#Calculate Monthly Room/Supply/Demand/Room Revenue/F&B Revenue/Total Revenue
date = "2020-04-30"
room = round(df["Room"].sum(),0)
supply = round(df["Supply"].sum(),0)
demand = round(df['Demand'].sum(),0)
room_revenue = df["Room_Revenue"].sum()
fb_revenue = df["F&B_Revenue"].sum()
total_revenue = df["Total_Revenue"].sum()

In [13]:
#Calculate Monthly Occupancy/ADR/RevPAR
occ = round((demand/supply),2)
adr = round((room_revenue/demand), 2)
#revpar = round((occ*adr), 2)
revpar = round((room_revenue/supply), 2)

In [14]:
#Append "Total" Row
df = df.append([{'Hotel': 'Total',
                'Area': 'Total',
                'Room': room,
                'Supply': supply,
                'Occupancy': occ,
                'ADR': adr,
                'Room_Revenue': room_revenue,
                'F&B_Revenue': fb_revenue,
                'Total_Revenue': total_revenue,
                'RevPAR': revpar,
                'Demand': demand}], ignore_index=True)
df.tail()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue,RevPAR,Supply
121,⻑榮桂冠酒店(基隆),Others,140,543,0.1293,2348.0,1275059,907045,3186952,303.5964,4200
122,鈺通⼤飯店,Others,120,216,0.06,2151.0,464705,1181005,1677660,129.06,3600
123,寶華⼤飯店,Others,78,101,0.0432,1374.0,138798,1560,142358,59.3568,2340
124,陸島酒店,Others,47,26,0.0184,1426.0,37071,2880,41701,26.2384,1410
125,Total,Total,28594,129777,0.15,3282.54,425998598,730926016,1399631627,496.61,857820


In [15]:
#Add "Date" Column 
df['Date'] = date
df.tail()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue,RevPAR,Supply,Date
121,⻑榮桂冠酒店(基隆),Others,140,543,0.1293,2348.0,1275059,907045,3186952,303.5964,4200,2020-04-30
122,鈺通⼤飯店,Others,120,216,0.06,2151.0,464705,1181005,1677660,129.06,3600,2020-04-30
123,寶華⼤飯店,Others,78,101,0.0432,1374.0,138798,1560,142358,59.3568,2340,2020-04-30
124,陸島酒店,Others,47,26,0.0184,1426.0,37071,2880,41701,26.2384,1410,2020-04-30
125,Total,Total,28594,129777,0.15,3282.54,425998598,730926016,1399631627,496.61,857820,2020-04-30


In [16]:
df['Date'] = pd.to_datetime(df['Date']).dt.to_period('D')

In [17]:
#Format 'Summary'
df['Date'] = df['Date'].dt.to_timestamp()
#df = df.style.format({'occ':'{:.0%}',
                    #'demand':'{:.0f}',
                    #'adr':'{:.2f}',
                    #'revpar':'{:.2f}'})

In [18]:
print(df.dtypes)

Hotel                    object
Area                     object
Room                      int64
Demand                    int64
Occupancy               float64
ADR                     float64
Room_Revenue              int64
F&B_Revenue               int64
Total_Revenue             int64
RevPAR                  float64
Supply                    int64
Date             datetime64[ns]
dtype: object


#Create DataFrame 'Summary'
summary = pd.DataFrame({
    'date':date,
    'room':room,
    'supply':supply,
    'demand':demand,
    'room_revenue': room_revenue, 
    'fb_revenue': fb_revenue,
    'total_revenue': total_revenue,
    'occ': occ,
    'adr': adr,
    'revpar':revpar   
}, index=[0])
summary

#Format Summary
summary['date'] = pd.to_datetime(summary['date']).dt.to_period('M')
sep20 = summary.style.format({'occ':'{:.0%}',
                    'demand':'{:.0f}',
                    'adr':'{:.2f}',
                    'revpar':'{:.2f}'})

sep20

df.to_excel(r'static\data\2020\sep20.xlsx', index = False, encoding='utf-8-sig')

In [19]:
df.to_csv(r'..\static\data\2020\apr20.csv', index = False, encoding='utf-8-sig')

In [20]:
from sqlalchemy import create_engine

In [21]:
connection_string = "postgres:postgres@localhost:5432/hotel_db"
engine = create_engine(f'postgresql://{connection_string}', encoding = 'utf-8')

In [22]:
engine.table_names()

['jan20',
 'feb20',
 'mar20',
 'apr20',
 'may20',
 'jun20',
 'jul20',
 'aug20',
 'sep20']

In [23]:
df.to_sql(name='apr20', con=engine, if_exists='append', index=False)