In [1]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt

In [2]:
#Read the ods File
path = "../Raw Data/mar20.ods"
df = pd.read_excel(path, engine="odf")
df.head()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,1113,7.18%,3208,3570844,11498239,18723562
1,TheGrandHotel,Taipei,0,0,,0,0,0,0
2,台北華國⼤飯店,Taipei,326,1608,15.91%,2651,4263218,6216833,11658045
3,ImperialHotelTaipei,Taipei,0,0,,0,0,0,0
4,華泰王⼦⼤飯店,Taipei,220,354,5.19%,2448,866439,4955978,7234985


In [3]:
#Clean Data 
df = pd.DataFrame(df, columns=['Hotel','Area','Room','Demand','Occupancy','ADR','Room_Revenue','F&B_Revenue','Total_Revenue'])
df.head()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,1113,7.18%,3208,3570844,11498239,18723562
1,TheGrandHotel,Taipei,0,0,,0,0,0,0
2,台北華國⼤飯店,Taipei,326,1608,15.91%,2651,4263218,6216833,11658045
3,ImperialHotelTaipei,Taipei,0,0,,0,0,0,0
4,華泰王⼦⼤飯店,Taipei,220,354,5.19%,2448,866439,4955978,7234985


In [4]:
df = df.dropna()
df

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,1113,7.18%,3208,3570844,11498239,18723562
2,台北華國⼤飯店,Taipei,326,1608,15.91%,2651,4263218,6216833,11658045
4,華泰王⼦⼤飯店,Taipei,220,354,5.19%,2448,866439,4955978,7234985
6,豪景⼤酒店,Taipei,209,265,4.09%,1622,429934,440320,1115432
8,國王⼤飯店,Taipei,97,181,6.02%,1986,359425,183190,587874
...,...,...,...,...,...,...,...,...,...
240,台東桂⽥喜來登酒店,Others,278,1844,21.40%,2517,4641601,5348980,10916066
242,⻑榮桂冠酒店(基隆),Others,140,482,11.11%,2612,1258993,788281,3566160
244,鈺通⼤飯店,Others,120,235,6.32%,2022,475066,2586824,3078328
246,寶華⼤飯店,Others,78,78,3.23%,1120,87392,0,89392


In [5]:
df.loc[df['Hotel']=='華泰王⼦⼤飯店']

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
4,華泰王⼦⼤飯店,Taipei,220,354,5.19%,2448,866439,4955978,7234985


In [6]:
print(df.dtypes)

Hotel            object
Area             object
Room              int64
Demand            int64
Occupancy        object
ADR               int64
Room_Revenue      int64
F&B_Revenue       int64
Total_Revenue     int64
dtype: object


df['Demand'] = df['Demand'].replace(',', '', regex=True)
df['ADR'] = df['ADR'].replace(',', '', regex=True)
df['Room_Revenue'] = df['Room_Revenue'].replace(',', '', regex=True)
df['F&B_Revenue'] = df['F&B_Revenue'].replace(',', '', regex=True)
df['Total_Revenue'] = df['Total_Revenue'].replace(',', '', regex=True)

In [7]:
df['Occupancy'] = df['Occupancy'].str.rstrip('%').astype('float') / 100.0

In [8]:
df[df['Occupancy'].isnull()]

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue


In [9]:
df.astype({'Room': 'int64',
          'Demand': 'int64',
          'Occupancy': 'int64',
          'ADR': 'float64',
          'Room_Revenue': 'int64',
          'F&B_Revenue': 'int64',
          'Total_Revenue': 'int64',
          }).dtypes

Hotel             object
Area              object
Room               int64
Demand             int64
Occupancy          int64
ADR              float64
Room_Revenue       int64
F&B_Revenue        int64
Total_Revenue      int64
dtype: object

In [10]:
Day = 31

In [11]:
#Calculate/Add RevPAR and Demand
df['RevPAR'] = df['Occupancy']*df['ADR']
df['Supply'] = df['Room']*Day
df

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue,RevPAR,Supply
0,圓⼭⼤飯店,Taipei,500,1113,0.0718,3208,3570844,11498239,18723562,230.3344,15500
2,台北華國⼤飯店,Taipei,326,1608,0.1591,2651,4263218,6216833,11658045,421.7741,10106
4,華泰王⼦⼤飯店,Taipei,220,354,0.0519,2448,866439,4955978,7234985,127.0512,6820
6,豪景⼤酒店,Taipei,209,265,0.0409,1622,429934,440320,1115432,66.3398,6479
8,國王⼤飯店,Taipei,97,181,0.0602,1986,359425,183190,587874,119.5572,3007
...,...,...,...,...,...,...,...,...,...,...,...
240,台東桂⽥喜來登酒店,Others,278,1844,0.2140,2517,4641601,5348980,10916066,538.6380,8618
242,⻑榮桂冠酒店(基隆),Others,140,482,0.1111,2612,1258993,788281,3566160,290.1932,4340
244,鈺通⼤飯店,Others,120,235,0.0632,2022,475066,2586824,3078328,127.7904,3720
246,寶華⼤飯店,Others,78,78,0.0323,1120,87392,0,89392,36.1760,2418


In [12]:
#Calculate Monthly Room/Supply/Demand/Room Revenue/F&B Revenue/Total Revenue
date = "2020-03-31"
room = round(df["Room"].sum(),0)
supply = round(df["Supply"].sum(),0)
demand = round(df['Demand'].sum(),0)
room_revenue = df["Room_Revenue"].sum()
fb_revenue = df["F&B_Revenue"].sum()
total_revenue = df["Total_Revenue"].sum()

In [13]:
#Calculate Monthly Occupancy/ADR/RevPAR
occ = round((demand/supply),2)
adr = round((room_revenue/demand), 2)
#revpar = round((occ*adr), 2)
revpar = round((room_revenue/supply), 2)

In [14]:
#Append "Total" Row
df = df.append([{'Hotel': 'Total',
                'Area': 'Total',
                'Room': room,
                'Supply': supply,
                'Occupancy': occ,
                'ADR': adr,
                'Room_Revenue': room_revenue,
                'F&B_Revenue': fb_revenue,
                'Total_Revenue': total_revenue,
                'RevPAR': revpar,
                'Demand': demand}], ignore_index=True)
df.tail()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue,RevPAR,Supply
121,⻑榮桂冠酒店(基隆),Others,140,482,0.1111,2612.0,1258993,788281,3566160,290.1932,4340
122,鈺通⼤飯店,Others,120,235,0.0632,2022.0,475066,2586824,3078328,127.7904,3720
123,寶華⼤飯店,Others,78,78,0.0323,1120.0,87392,0,89392,36.176,2418
124,陸島酒店,Others,47,0,0.0,0.0,39309,3040,42349,0.0,1457
125,Total,Total,28595,157191,0.18,3343.81,525616054,893183278,1698587213,592.95,886445


In [15]:
#Add "Date" Column 
df['Date'] = date
df.tail()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue,RevPAR,Supply,Date
121,⻑榮桂冠酒店(基隆),Others,140,482,0.1111,2612.0,1258993,788281,3566160,290.1932,4340,2020-03-31
122,鈺通⼤飯店,Others,120,235,0.0632,2022.0,475066,2586824,3078328,127.7904,3720,2020-03-31
123,寶華⼤飯店,Others,78,78,0.0323,1120.0,87392,0,89392,36.176,2418,2020-03-31
124,陸島酒店,Others,47,0,0.0,0.0,39309,3040,42349,0.0,1457,2020-03-31
125,Total,Total,28595,157191,0.18,3343.81,525616054,893183278,1698587213,592.95,886445,2020-03-31


In [16]:
df['Date'] = pd.to_datetime(df['Date']).dt.to_period('D')

In [17]:
#Format 'Summary'
df['Date'] = df['Date'].dt.to_timestamp()
#df = df.style.format({'occ':'{:.0%}',
                    #'demand':'{:.0f}',
                    #'adr':'{:.2f}',
                    #'revpar':'{:.2f}'})

In [18]:
print(df.dtypes)

Hotel                    object
Area                     object
Room                      int64
Demand                    int64
Occupancy               float64
ADR                     float64
Room_Revenue              int64
F&B_Revenue               int64
Total_Revenue             int64
RevPAR                  float64
Supply                    int64
Date             datetime64[ns]
dtype: object


#Create DataFrame 'Summary'
summary = pd.DataFrame({
    'date':date,
    'room':room,
    'supply':supply,
    'demand':demand,
    'room_revenue': room_revenue, 
    'fb_revenue': fb_revenue,
    'total_revenue': total_revenue,
    'occ': occ,
    'adr': adr,
    'revpar':revpar   
}, index=[0])
summary

#Format Summary
summary['date'] = pd.to_datetime(summary['date']).dt.to_period('M')
sep20 = summary.style.format({'occ':'{:.0%}',
                    'demand':'{:.0f}',
                    'adr':'{:.2f}',
                    'revpar':'{:.2f}'})

sep20

df.to_excel(r'static\data\2020\sep20.xlsx', index = False, encoding='utf-8-sig')

In [19]:
df.to_csv(r'..\static\data\2020\mar20.csv', index = False, encoding='utf-8-sig')

In [20]:
from sqlalchemy import create_engine

In [21]:
connection_string = "postgres:postgres@localhost:5432/hotel_db"
engine = create_engine(f'postgresql://{connection_string}', encoding = 'utf-8')

In [22]:
engine.table_names()

['jan20',
 'feb20',
 'mar20',
 'apr20',
 'may20',
 'jun20',
 'jul20',
 'aug20',
 'sep20']

In [23]:
df.to_sql(name='mar20', con=engine, if_exists='append', index=False)