In [53]:
import requests
import camelot
import datetime
import os
import pandas as pd
from deta import Deta
from dotenv import load_dotenv

load_dotenv()

deta = Deta(os.environ.get("DETA_PROJECT_KEY"))

data = deta.Drive("data")


In [44]:
url = 'https://www.dropbox.com/s/ckijmipu33z3feg/HourlyReport.pdf?dl=1'
r = requests.get(url, allow_redirects=True)
open('hourlyreport.pdf', 'wb').write(r.content)

1054352

In [45]:
tables = camelot.read_pdf('hourlyreport.pdf', flavor='stream', pages='1', columns=['41, 57, 75, 97.5, 115.5, 138, 160, 187, 202, 220, 238, 256, 279, 301, 323, 342, 360, 381, 400, 425, 443, 465, 483, 501, 519, 537, 559.5, 577.5, 600, 627.5, 652, 667, 690, 706.5, 730, 753, 778, 798, 822, 839.5, 855.5, 881, 906, 930, 956, 978'])

In [27]:
column_names = ['dateflg',
 'timeflg',
 'INFLOW_STRETCHER',
 'Infl_Stretcher_cum',
 'INFLOW_AMBULATORY',
 'Infl_Ambulatory_cum',
 'Inflow_Total',
 'Inflow_Cum_Total',
 'INFLOW_AMBULANCES',
 'Infl_Ambulances_cum',
 'FLS',
 'CUM_ADMREQ',
 'CUM_BA1',
 'WAITINGADM',
 'TTStr',
 'TRG_HALLWAY1',
 'TRG_HALLWAY_TBS',
 'reoriented_cum',
 'reoriented_cum_MD',
 'QTRACK1',
 'RESUS',
 'Pod_T',
 'POD_GREEN',
 'POD_GREEN_TBS',
 'POD_YELLOW',
 'POD_YELLOW_TBS',
 'POD_ORANGE',
 'POD_ORANGE_TBS',
 'POD_CONS_MORE2H',
 'POD_IMCONS_MORE4H',
 'POD_XRAY_MORE2H',
 'POD_CT_MORE2H',
 'POST_POD1',
 'VERTSTRET',
 'RAZ_TBS',
 'RAZ_LAZYBOY',
 'RAZ_WAITINGREZ',
 'AMBVERT1',
 'AMBVERTTBS',
 'QTrack_TBS',
 'Garage_TBS',
 'RAZ_CONS_MORE2H',
 'RAZ_IMCONS_MORE4H',
 'RAZ_XRAY_MORE2H',
 'RAZ_CT_MORE2H1',
 'PSYCH1',
 'PSYCH_WAITINGADM']

In [62]:
df = tables[0].df.loc[2:].reset_index(drop=True)
df.columns = column_names
df.dateflg = df.dateflg[df.dateflg.str.strip() != '']
df.dateflg = df.dateflg.ffill()
for column in df.columns.tolist():
    if column in ['dateflg']:
        continue
    df[column] = df[column].astype('float').astype('int')
df["ds"] = pd.to_datetime(
    df["dateflg"] + " " + (df["timeflg"] - 1).astype(str) + ":00", format='mixed') + datetime.timedelta(hours=1)
df = df.set_index('ds').reset_index().drop(['dateflg','timeflg'], axis=1)


In [56]:
df.head()

Unnamed: 0,ds,INFLOW_STRETCHER,Infl_Stretcher_cum,INFLOW_AMBULATORY,Infl_Ambulatory_cum,Inflow_Total,Inflow_Cum_Total,INFLOW_AMBULANCES,Infl_Ambulances_cum,FLS,...,AMBVERT1,AMBVERTTBS,QTrack_TBS,Garage_TBS,RAZ_CONS_MORE2H,RAZ_IMCONS_MORE4H,RAZ_XRAY_MORE2H,RAZ_CT_MORE2H1,PSYCH1,PSYCH_WAITINGADM
0,2023-11-16 23:00:00,2,162,1,86,3,248,0,34,0,...,22,17,2,0,7,1,1,4,5,3
1,2023-11-16 22:00:00,6,160,4,85,10,245,1,34,0,...,23,17,2,0,7,1,1,5,6,4
2,2023-11-16 21:00:00,7,154,0,81,7,235,0,33,0,...,27,22,1,0,7,1,2,4,6,4
3,2023-11-16 20:00:00,9,147,5,81,14,228,2,33,1,...,29,25,1,0,7,1,0,1,7,4
4,2023-11-16 19:00:00,9,138,6,76,15,214,1,31,0,...,25,21,0,0,5,1,1,3,8,5


In [57]:
df.to_csv('current.csv', index=False)

In [58]:
data.put(name='current.csv', path='current.csv')

'current.csv'

In [63]:
import constants

In [64]:
constants.column_names

['dateflg',
 'timeflg',
 'INFLOW_STRETCHER',
 'Infl_Stretcher_cum',
 'INFLOW_AMBULATORY',
 'Infl_Ambulatory_cum',
 'Inflow_Total',
 'Inflow_Cum_Total',
 'INFLOW_AMBULANCES',
 'Infl_Ambulances_cum',
 'FLS',
 'CUM_ADMREQ',
 'CUM_BA1',
 'WAITINGADM',
 'TTStr',
 'TRG_HALLWAY1',
 'TRG_HALLWAY_TBS',
 'reoriented_cum',
 'reoriented_cum_MD',
 'QTRACK1',
 'RESUS',
 'Pod_T',
 'POD_GREEN',
 'POD_GREEN_TBS',
 'POD_YELLOW',
 'POD_YELLOW_TBS',
 'POD_ORANGE',
 'POD_ORANGE_TBS',
 'POD_CONS_MORE2H',
 'POD_IMCONS_MORE4H',
 'POD_XRAY_MORE2H',
 'POD_CT_MORE2H',
 'POST_POD1',
 'VERTSTRET',
 'RAZ_TBS',
 'RAZ_LAZYBOY',
 'RAZ_WAITINGREZ',
 'AMBVERT1',
 'AMBVERTTBS',
 'QTrack_TBS',
 'Garage_TBS',
 'RAZ_CONS_MORE2H',
 'RAZ_IMCONS_MORE4H',
 'RAZ_XRAY_MORE2H',
 'RAZ_CT_MORE2H1',
 'PSYCH1',
 'PSYCH_WAITINGADM']