In [1]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt

In [2]:
#Read the ods File
path = "../Raw Data/jun20.ods"
df = pd.read_excel(path, engine="odf")
df.head()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,4346,28.97%,2606,11324332,38593239,61459524
1,TheGrandHotel,Taipei,0,0,,0,0,0,0
2,台北華國⼤飯店,Taipei,326,1355,13.85%,1823,2469773,13413708,16882436
3,ImperialHotelTaipei,Taipei,0,0,,0,0,0,0
4,華泰王⼦⼤飯店,Taipei,220,1040,15.76%,1212,1260439,8707707,10688801


In [3]:
#Clean Data 
df = pd.DataFrame(df, columns=['Hotel','Area','Room','Demand','Occupancy','ADR','Room_Revenue','F&B_Revenue','Total_Revenue'])
df.head()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,4346,28.97%,2606,11324332,38593239,61459524
1,TheGrandHotel,Taipei,0,0,,0,0,0,0
2,台北華國⼤飯店,Taipei,326,1355,13.85%,1823,2469773,13413708,16882436
3,ImperialHotelTaipei,Taipei,0,0,,0,0,0,0
4,華泰王⼦⼤飯店,Taipei,220,1040,15.76%,1212,1260439,8707707,10688801


In [4]:
df = df.dropna()
df

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,4346,28.97%,2606,11324332,38593239,61459524
2,台北華國⼤飯店,Taipei,326,1355,13.85%,1823,2469773,13413708,16882436
4,華泰王⼦⼤飯店,Taipei,220,1040,15.76%,1212,1260439,8707707,10688801
6,豪景⼤酒店,Taipei,209,19,0.30%,2140,40654,0,327658
8,國王⼤飯店,Taipei,97,20,0.69%,2483,49661,144303,240392
...,...,...,...,...,...,...,...,...,...
240,台東桂⽥喜來登酒店,Others,278,4919,58.98%,2872,14129274,11694762,27982495
242,⻑榮桂冠酒店(基隆),Others,140,2241,53.36%,2594,5814092,3500771,11010222
244,鈺通⼤飯店,Others,120,702,19.50%,1880,1319990,2684711,4022584
246,寶華⼤飯店,Others,78,795,33.97%,1590,1264446,6360,1272806


In [5]:
df.loc[df['Hotel']=='華泰王⼦⼤飯店']

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
4,華泰王⼦⼤飯店,Taipei,220,1040,15.76%,1212,1260439,8707707,10688801


In [6]:
print(df.dtypes)

Hotel            object
Area             object
Room              int64
Demand            int64
Occupancy        object
ADR               int64
Room_Revenue      int64
F&B_Revenue       int64
Total_Revenue     int64
dtype: object


df['Demand'] = df['Demand'].replace(',', '', regex=True)
df['ADR'] = df['ADR'].replace(',', '', regex=True)
df['Room_Revenue'] = df['Room_Revenue'].replace(',', '', regex=True)
df['F&B_Revenue'] = df['F&B_Revenue'].replace(',', '', regex=True)
df['Total_Revenue'] = df['Total_Revenue'].replace(',', '', regex=True)

In [7]:
df['Occupancy'] = df['Occupancy'].str.rstrip('%').astype('float') / 100.0

In [8]:
df

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue
0,圓⼭⼤飯店,Taipei,500,4346,0.2897,2606,11324332,38593239,61459524
2,台北華國⼤飯店,Taipei,326,1355,0.1385,1823,2469773,13413708,16882436
4,華泰王⼦⼤飯店,Taipei,220,1040,0.1576,1212,1260439,8707707,10688801
6,豪景⼤酒店,Taipei,209,19,0.0030,2140,40654,0,327658
8,國王⼤飯店,Taipei,97,20,0.0069,2483,49661,144303,240392
...,...,...,...,...,...,...,...,...,...
240,台東桂⽥喜來登酒店,Others,278,4919,0.5898,2872,14129274,11694762,27982495
242,⻑榮桂冠酒店(基隆),Others,140,2241,0.5336,2594,5814092,3500771,11010222
244,鈺通⼤飯店,Others,120,702,0.1950,1880,1319990,2684711,4022584
246,寶華⼤飯店,Others,78,795,0.3397,1590,1264446,6360,1272806


In [9]:
df.astype({'Room': 'int64',
          'Demand': 'int64',
          'Occupancy': 'int64',
          'ADR': 'float64',
          'Room_Revenue': 'int64',
          'F&B_Revenue': 'int64',
          'Total_Revenue': 'int64',
          }).dtypes

Hotel             object
Area              object
Room               int64
Demand             int64
Occupancy          int64
ADR              float64
Room_Revenue       int64
F&B_Revenue        int64
Total_Revenue      int64
dtype: object

In [10]:
Day = 30

In [11]:
#Calculate/Add RevPAR and Demand
df['RevPAR'] = df['Occupancy']*df['ADR']
df['Supply'] = df['Room']*Day
df

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue,RevPAR,Supply
0,圓⼭⼤飯店,Taipei,500,4346,0.2897,2606,11324332,38593239,61459524,754.9582,15000
2,台北華國⼤飯店,Taipei,326,1355,0.1385,1823,2469773,13413708,16882436,252.4855,9780
4,華泰王⼦⼤飯店,Taipei,220,1040,0.1576,1212,1260439,8707707,10688801,191.0112,6600
6,豪景⼤酒店,Taipei,209,19,0.0030,2140,40654,0,327658,6.4200,6270
8,國王⼤飯店,Taipei,97,20,0.0069,2483,49661,144303,240392,17.1327,2910
...,...,...,...,...,...,...,...,...,...,...,...
240,台東桂⽥喜來登酒店,Others,278,4919,0.5898,2872,14129274,11694762,27982495,1693.9056,8340
242,⻑榮桂冠酒店(基隆),Others,140,2241,0.5336,2594,5814092,3500771,11010222,1384.1584,4200
244,鈺通⼤飯店,Others,120,702,0.1950,1880,1319990,2684711,4022584,366.6000,3600
246,寶華⼤飯店,Others,78,795,0.3397,1590,1264446,6360,1272806,540.1230,2340


In [12]:
#Calculate Monthly Room/Supply/Demand/Room Revenue/F&B Revenue/Total Revenue
date = "2020-06-30"
room = round(df["Room"].sum(),0)
supply = round(df["Supply"].sum(),0)
demand = round(df['Demand'].sum(),0)
room_revenue = df["Room_Revenue"].sum()
fb_revenue = df["F&B_Revenue"].sum()
total_revenue = df["Total_Revenue"].sum()

In [13]:
#Calculate Monthly Occupancy/ADR/RevPAR
occ = round((demand/supply),2)
adr = round((room_revenue/demand), 2)
#revpar = round((occ*adr), 2)
revpar = round((room_revenue/supply), 2)

In [14]:
#Append "Total" Row
df = df.append([{'Hotel': 'Total',
                'Area': 'Total',
                'Room': room,
                'Supply': supply,
                'Occupancy': occ,
                'ADR': adr,
                'Room_Revenue': room_revenue,
                'F&B_Revenue': fb_revenue,
                'Total_Revenue': total_revenue,
                'RevPAR': revpar,
                'Demand': demand}], ignore_index=True)
df.tail()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue,RevPAR,Supply
121,⻑榮桂冠酒店(基隆),Others,140,2241,0.5336,2594.0,5814092,3500771,11010222,1384.1584,4200
122,鈺通⼤飯店,Others,120,702,0.195,1880.0,1319990,2684711,4022584,366.6,3600
123,寶華⼤飯店,Others,78,795,0.3397,1590.0,1264446,6360,1272806,540.123,2340
124,陸島酒店,Others,47,544,0.3858,1009.0,548975,84400,634625,389.2722,1410
125,Total,Total,28503,293415,0.34,3364.63,987231644,1505597842,2898421619,1154.54,855090


In [15]:
#Add "Date" Column 
df['Date'] = date
df.tail()

Unnamed: 0,Hotel,Area,Room,Demand,Occupancy,ADR,Room_Revenue,F&B_Revenue,Total_Revenue,RevPAR,Supply,Date
121,⻑榮桂冠酒店(基隆),Others,140,2241,0.5336,2594.0,5814092,3500771,11010222,1384.1584,4200,2020-06-30
122,鈺通⼤飯店,Others,120,702,0.195,1880.0,1319990,2684711,4022584,366.6,3600,2020-06-30
123,寶華⼤飯店,Others,78,795,0.3397,1590.0,1264446,6360,1272806,540.123,2340,2020-06-30
124,陸島酒店,Others,47,544,0.3858,1009.0,548975,84400,634625,389.2722,1410,2020-06-30
125,Total,Total,28503,293415,0.34,3364.63,987231644,1505597842,2898421619,1154.54,855090,2020-06-30


In [16]:
df['Date'] = pd.to_datetime(df['Date']).dt.to_period('D')

In [17]:
#Format 'Summary'
df['Date'] = df['Date'].dt.to_timestamp()
#df = df.style.format({'occ':'{:.0%}',
                    #'demand':'{:.0f}',
                    #'adr':'{:.2f}',
                    #'revpar':'{:.2f}'})

In [18]:
print(df.dtypes)

Hotel                    object
Area                     object
Room                      int64
Demand                    int64
Occupancy               float64
ADR                     float64
Room_Revenue              int64
F&B_Revenue               int64
Total_Revenue             int64
RevPAR                  float64
Supply                    int64
Date             datetime64[ns]
dtype: object


#Create DataFrame 'Summary'
summary = pd.DataFrame({
    'date':date,
    'room':room,
    'supply':supply,
    'demand':demand,
    'room_revenue': room_revenue, 
    'fb_revenue': fb_revenue,
    'total_revenue': total_revenue,
    'occ': occ,
    'adr': adr,
    'revpar':revpar   
}, index=[0])
summary

#Format Summary
summary['date'] = pd.to_datetime(summary['date']).dt.to_period('M')
sep20 = summary.style.format({'occ':'{:.0%}',
                    'demand':'{:.0f}',
                    'adr':'{:.2f}',
                    'revpar':'{:.2f}'})

sep20

df.to_excel(r'static\data\2020\sep20.xlsx', index = False, encoding='utf-8-sig')

In [19]:
df.to_csv(r'..\static\data\2020\jun20.csv', index = False, encoding='utf-8-sig')

In [20]:
from sqlalchemy import create_engine

In [21]:
connection_string = "postgres:postgres@localhost:5432/hotel_db"
engine = create_engine(f'postgresql://{connection_string}', encoding = 'utf-8')

In [22]:
engine.table_names()

['jan20',
 'feb20',
 'mar20',
 'apr20',
 'may20',
 'jun20',
 'jul20',
 'aug20',
 'sep20']

In [23]:
df.to_sql(name='jun20', con=engine, if_exists='append', index=False)