In [18]:
import os
import datetime
import requests
import camelot
import pandas as pd
from deta import Deta
from dotenv import load_dotenv
import constants

load_dotenv()

deta = Deta(os.environ.get("DETA_PROJECT_KEY"))

drive = deta.Drive("data")

URL = 'https://www.dropbox.com/s/ckijmipu33z3feg/HourlyReport.pdf?dl=1'
r = requests.get(URL, allow_redirects=True)
open('hourlyreport.pdf', 'wb').write(r.content)

tables = camelot.read_pdf(
    'hourlyreport.pdf',
    pages='1',
    flavor='stream',
    table_areas=['8,410,1000,50']
)

df = tables[0].df.reset_index(drop=True)
df.columns = constants.column_names
df.dateflg = df.dateflg[df.dateflg.str.strip() != '']
df.dateflg = df.dateflg.ffill()
for column in df.columns.tolist():
    if column in ['dateflg']:
        continue
    df[column] = df[column].astype('float').astype('int')
df["ds"] = pd.to_datetime(
    df["dateflg"] + " " + (df["timeflg"] - 1).astype(str) + ":00") + datetime.timedelta(hours=1)
df = df.set_index('ds').reset_index().drop(['dateflg', 'timeflg'], axis=1)

df.to_csv('current.csv', index=False)

# drive.put(name='current.csv', path='current.csv')

print(df.head())



                   ds  INFLOW_STRETCHER  Infl_Stretcher_cum  \
0 2023-11-22 20:00:00                 4                 127   
1 2023-11-22 19:00:00                 9                 123   
2 2023-11-22 18:00:00                 7                 114   
3 2023-11-22 17:00:00                 8                 107   
4 2023-11-22 16:00:00                 6                  99   

   INFLOW_AMBULATORY  Infl_Ambulatory_cum  Inflow_Total  Inflow_Cum_Total  \
0                  3                   89             7               216   
1                  3                   86            12               209   
2                  5                   83            12               197   
3                  4                   78            12               185   
4                  8                   74            14               173   

   INFLOW_AMBULANCES  Infl_Ambulances_cum  FLS  ...  AMBVERT1  AMBVERTTBS  \
0                  1                   32    1  ...        25          15   
1   

In [13]:
allData = pd.read_csv('https://drive.deta.sh/v1/b0x22rtxtdf/data/files/download?name=allData.csv', storage_options={'X-API-Key':os.environ.get("DETA_PROJECT_KEY")})
allData

Unnamed: 0,ds,INFLOW_STRETCHER,Infl_Stretcher_cum,INFLOW_AMBULATORY,Infl_Ambulatory_cum,Inflow_Total,Inflow_Cum_Total,INFLOW_AMBULANCES,Infl_Ambulances_cum,FLS,...,AMBVERT1,AMBVERTTBS,QTrack_TBS,Garage_TBS,RAZ_CONS_MORE2H,RAZ_IMCONS_MORE4H,RAZ_XRAY_MORE2H,RAZ_CT_MORE2H1,PSYCH1,PSYCH_WAITINGADM
0,2021-01-01 01:00:00,1,1,1,1,2,2,0,0,0,...,0,0,,,1,0,0,0,3,3
1,2021-01-01 02:00:00,2,3,1,2,3,5,1,1,0,...,2,2,,,1,1,0,0,3,3
2,2021-01-01 03:00:00,0,3,1,3,1,6,0,1,0,...,2,1,,,0,0,0,0,3,3
3,2021-01-01 04:00:00,1,4,0,3,1,7,1,2,0,...,2,0,,,0,0,0,0,4,3
4,2021-01-01 05:00:00,1,5,1,4,2,9,1,3,0,...,1,1,,,0,0,0,0,4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25098,2023-11-14 10:00:00,9,48,5,23,14,71,1,11,0,...,14,10,2.0,0.0,14,0,0,4,11,6
25099,2023-11-14 11:00:00,12,60,10,33,22,93,5,16,0,...,17,12,5.0,0.0,16,0,0,7,10,6
25100,2023-11-14 12:00:00,16,76,9,42,25,118,3,19,0,...,21,16,7.0,0.0,12,0,0,4,10,5
25101,2023-11-14 13:00:00,14,90,7,49,21,139,1,20,0,...,28,24,5.0,0.0,12,0,0,5,10,5


In [21]:
allData = pd.concat([allData, df], ignore_index=True).drop_duplicates(keep='last')
# allData = allData.sort_values(by='ds', ascending=True)
allData

Unnamed: 0,ds,INFLOW_STRETCHER,Infl_Stretcher_cum,INFLOW_AMBULATORY,Infl_Ambulatory_cum,Inflow_Total,Inflow_Cum_Total,INFLOW_AMBULANCES,Infl_Ambulances_cum,FLS,...,AMBVERT1,AMBVERTTBS,QTrack_TBS,Garage_TBS,RAZ_CONS_MORE2H,RAZ_IMCONS_MORE4H,RAZ_XRAY_MORE2H,RAZ_CT_MORE2H1,PSYCH1,PSYCH_WAITINGADM
0,2021-01-01 01:00:00,1,1,1,1,2,2,0,0,0,...,0,0,,,1,0,0,0,3,3
1,2021-01-01 02:00:00,2,3,1,2,3,5,1,1,0,...,2,2,,,1,1,0,0,3,3
2,2021-01-01 03:00:00,0,3,1,3,1,6,0,1,0,...,2,1,,,0,0,0,0,3,3
3,2021-01-01 04:00:00,1,4,0,3,1,7,1,2,0,...,2,0,,,0,0,0,0,4,3
4,2021-01-01 05:00:00,1,5,1,4,2,9,1,3,0,...,1,1,,,0,0,0,0,4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25340,2023-11-21 19:00:00,9,116,5,95,14,211,7,39,1,...,28,20,5.0,0.0,5,1,0,3,19,9
25341,2023-11-21 18:00:00,4,107,5,90,9,197,2,32,0,...,26,21,5.0,0.0,9,1,9,3,20,10
25342,2023-11-21 17:00:00,7,103,7,85,14,188,1,30,1,...,24,19,8.0,0.0,12,1,11,0,22,12
25343,2023-11-21 16:00:00,5,96,8,78,13,174,3,29,0,...,19,16,6.0,0.0,12,0,10,0,21,13


In [16]:
df = pd.read_csv('../data/23-11-22.csv')
df.columns = constants.column_names
df.dateflg = df.dateflg[df.dateflg.str.strip() != '']
df.dateflg = df.dateflg.ffill()
for column in df.columns.tolist():
    if column in ['dateflg']:
        continue
    df[column] = df[column].astype('float').astype('int')
df["ds"] = pd.to_datetime(
    df["dateflg"] + " " + (df["timeflg"] - 1).astype(str) + ":00") + datetime.timedelta(hours=1)
df = df.set_index('ds').reset_index().drop(['dateflg', 'timeflg'], axis=1).sort_values(by='ds', ascending=True)
df

Unnamed: 0,ds,INFLOW_STRETCHER,Infl_Stretcher_cum,INFLOW_AMBULATORY,Infl_Ambulatory_cum,Inflow_Total,Inflow_Cum_Total,INFLOW_AMBULANCES,Infl_Ambulances_cum,FLS,...,AMBVERT1,AMBVERTTBS,QTrack_TBS,Garage_TBS,RAZ_CONS_MORE2H,RAZ_IMCONS_MORE4H,RAZ_XRAY_MORE2H,RAZ_CT_MORE2H1,PSYCH1,PSYCH_WAITINGADM
191,2023-11-14 01:00:00,1,1,0,0,1,1,1,1,0,...,23,23,1,0,4,1,5,2,12,6
190,2023-11-14 02:00:00,5,6,3,3,8,9,0,1,0,...,24,24,1,0,7,1,1,1,13,6
189,2023-11-14 03:00:00,4,10,3,6,7,16,1,2,0,...,28,27,2,0,7,1,1,1,13,6
188,2023-11-14 04:00:00,2,12,0,6,2,18,1,3,0,...,24,24,3,0,11,1,0,3,13,6
187,2023-11-14 05:00:00,3,15,1,7,4,22,2,5,0,...,24,20,4,0,11,2,0,1,13,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,2023-11-21 20:00:00,2,118,3,98,5,216,1,40,1,...,26,18,3,0,6,2,0,4,18,9
3,2023-11-21 21:00:00,6,124,5,103,11,227,3,43,0,...,29,19,2,0,7,2,0,4,17,9
2,2023-11-21 22:00:00,4,128,2,105,6,233,4,47,0,...,27,21,2,0,7,3,1,2,18,9
1,2023-11-21 23:00:00,5,133,2,107,7,240,1,48,0,...,25,16,2,0,6,2,0,2,17,9


In [22]:
allData.ds = pd.to_datetime(allData.ds)

In [23]:
allData = allData.sort_values(by='ds', ascending=True)
allData

Unnamed: 0,ds,INFLOW_STRETCHER,Infl_Stretcher_cum,INFLOW_AMBULATORY,Infl_Ambulatory_cum,Inflow_Total,Inflow_Cum_Total,INFLOW_AMBULANCES,Infl_Ambulances_cum,FLS,...,AMBVERT1,AMBVERTTBS,QTrack_TBS,Garage_TBS,RAZ_CONS_MORE2H,RAZ_IMCONS_MORE4H,RAZ_XRAY_MORE2H,RAZ_CT_MORE2H1,PSYCH1,PSYCH_WAITINGADM
0,2021-01-01 01:00:00,1,1,1,1,2,2,0,0,0,...,0,0,,,1,0,0,0,3,3
1,2021-01-01 02:00:00,2,3,1,2,3,5,1,1,0,...,2,2,,,1,1,0,0,3,3
2,2021-01-01 03:00:00,0,3,1,3,1,6,0,1,0,...,2,1,,,0,0,0,0,3,3
3,2021-01-01 04:00:00,1,4,0,3,1,7,1,2,0,...,2,0,,,0,0,0,0,4,3
4,2021-01-01 05:00:00,1,5,1,4,2,9,1,3,0,...,1,1,,,0,0,0,0,4,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25319,2023-11-22 16:00:00,6,99,8,74,14,173,2,22,0,...,21,18,12.0,0.0,7,0,1,2,14,10
25318,2023-11-22 17:00:00,8,107,4,78,12,185,3,25,0,...,25,23,8.0,0.0,4,0,0,2,15,10
25317,2023-11-22 18:00:00,7,114,5,83,12,197,2,27,0,...,28,24,8.0,0.0,4,0,0,1,15,10
25316,2023-11-22 19:00:00,9,123,3,86,12,209,4,31,0,...,22,16,4.0,0.0,2,1,0,0,14,10


In [24]:
allData.to_csv('allData.csv', index=False)

In [25]:
drive.put(name='allData.csv', path='allData.csv')


'allData.csv'

In [27]:
import os
import datetime
import requests
import camelot
import pandas as pd
from deta import Deta
from dotenv import load_dotenv
import constants

load_dotenv()

deta = Deta(os.environ.get("DETA_PROJECT_KEY"))

drive = deta.Drive("data")

URL = 'https://www.dropbox.com/s/ckijmipu33z3feg/HourlyReport.pdf?dl=1'
r = requests.get(URL, allow_redirects=True)
open('hourlyreport.pdf', 'wb').write(r.content)

tables = camelot.read_pdf(
    'hourlyreport.pdf',
    pages='1',
    flavor='stream',
    table_areas=['8,410,1000,50']
)

df = tables[0].df.reset_index(drop=True)
df.columns = constants.column_names
df.dateflg = df.dateflg[df.dateflg.str.strip() != '']
df.dateflg = df.dateflg.ffill()
for column in df.columns.tolist():
    if column in ['dateflg']:
        continue
    df[column] = df[column].astype('float').astype('int')
df["ds"] = pd.to_datetime(
    df["dateflg"] + " " + (df["timeflg"] - 1).astype(str) + ":00") + datetime.timedelta(hours=1)
df = df.set_index('ds').reset_index().drop(['dateflg', 'timeflg'], axis=1)

df.to_csv('current.csv', index=False)

drive.put(name='current.csv', path='current.csv')

df = df.sort_values(by='ds', ascending=True)

allData = pd.read_csv('https://drive.deta.sh/v1/b0x22rtxtdf/data/files/download?name=allData.csv',
                      storage_options={'X-API-Key': os.environ.get("DETA_PROJECT_KEY")})
allData.ds = pd.to_datetime(allData.ds)

allData = pd.concat([allData, df], ignore_index=True).drop_duplicates(
    keep='last').sort_values(by='ds', ascending=True)

allData.to_csv('allData.csv', index=False)

drive.put(name='allData.csv', path='allData.csv')

print(allData.tail(5))





Unnamed: 0,ds,INFLOW_STRETCHER,Infl_Stretcher_cum,INFLOW_AMBULATORY,Infl_Ambulatory_cum,Inflow_Total,Inflow_Cum_Total,INFLOW_AMBULANCES,Infl_Ambulances_cum,FLS,...,AMBVERT1,AMBVERTTBS,QTrack_TBS,Garage_TBS,RAZ_CONS_MORE2H,RAZ_IMCONS_MORE4H,RAZ_XRAY_MORE2H,RAZ_CT_MORE2H1,PSYCH1,PSYCH_WAITINGADM
25335,2023-11-22 11:00:00,15,51,8,37,23,88,2,9,0,...,23,18,3.0,0.0,11,0,0,2,13,10
25336,2023-11-22 12:00:00,9,60,8,45,17,105,4,13,0,...,28,20,6.0,0.0,9,0,0,1,13,10
25337,2023-11-22 13:00:00,6,66,4,49,10,115,1,14,1,...,13,8,7.0,0.0,9,0,2,6,12,9
25338,2023-11-22 14:00:00,14,80,8,57,22,137,3,17,0,...,15,12,9.0,0.0,7,0,2,5,12,10
25339,2023-11-22 15:00:00,13,93,9,66,22,159,3,20,1,...,19,15,12.0,0.0,8,0,2,5,12,9
25340,2023-11-22 16:00:00,6,99,8,74,14,173,2,22,0,...,21,18,12.0,0.0,7,0,1,2,14,10
25341,2023-11-22 17:00:00,8,107,4,78,12,185,3,25,0,...,25,23,8.0,0.0,4,0,0,2,15,10
25342,2023-11-22 18:00:00,7,114,5,83,12,197,2,27,0,...,28,24,8.0,0.0,4,0,0,1,15,10
25343,2023-11-22 19:00:00,9,123,3,86,12,209,4,31,0,...,22,16,4.0,0.0,2,1,0,0,14,10
25344,2023-11-22 20:00:00,4,127,3,89,7,216,1,32,1,...,25,15,2.0,0.0,2,1,1,4,15,10
