In [46]:
import pandas as pd
import pickle
import numpy as np
import sys
import requests
import json
sys.path.insert(1, '../src')

## Basic Statistics

In [33]:
with open(r'../data/STATS.pkl', 'rb') as f:
    stats_df = pickle.load(f)

In [2]:
from webscraper import fetch_stat
stats_df = fetch_stat()

In [25]:
stats_df

Unnamed: 0,Death,Confirmed,Investigating,Reported
0,1,24,158,1100


In [90]:
def fetch_stat():
    """
    Return a DataFrame that represent death, confirmed, investigating and reported numbers respectively.
    
    There are two elements in the json that contain the required value: 
    "allBotWarsLatestFigures" and "allWarsLatestFiguresOverride".
    
    The values in allBotWarsLatestFigures are the record in history for every day.
    The values in allWarsLatestFiguresOverride are the live values.
    Whenenver an attribute in allWarsLatestFiguresOverride is not an empty string,
    it overrides the values in allBotWarsLatestFigures.
    
    """
    statnames = ['Death', 'Confirmed', 'Investigating', 'Reported']
    
    page = requests.get('https://wars.vote4.hk/page-data/en/page-data.json')
    data = json.loads(page.content)
    print(json.dumps(data['result']['data'], indent=4, sort_keys=True))
    stats_yesterday = data['result']['data']['allBotWarsLatestFigures']['edges'][0]
    stats_live = data['result']['data']['allWarsLatestFiguresOverride']['edges'][0]
    
    res = {}
    
    for attr in ['death', 'confirmed', 'investigating', 'reported', 'discharged', 'ruled_out']:
        if stats_live['node'][attr] != "":
            res[attr] = stats_live['node'][attr]
        else:
            res[attr] = stats_yesterday['node'][attr]
    
    df = pd.DataFrame(data=res, index=[0])
    
    return df

In [91]:
fetch_stat()

{
    "allBotWarsLatestFigures": {
        "edges": [
            {
                "node": {
                    "confirmed": "36",
                    "date": "2020-02-09",
                    "death": "1",
                    "discharged": "0",
                    "investigating": "129",
                    "reported": "1231",
                    "ruled_out": "1066",
                    "time": "12:00"
                }
            },
            {
                "node": {
                    "confirmed": "26",
                    "date": "2020-02-08",
                    "death": "1",
                    "discharged": "0",
                    "investigating": "141",
                    "reported": "1202",
                    "ruled_out": "1035",
                    "time": "12:00"
                }
            },
            {
                "node": {
                    "confirmed": "26",
                    "date": "2020-02-07",
                    "death": "1",
               

Unnamed: 0,death,confirmed,investigating,reported
0,1,38,129,1231


In [4]:
with open(r'../data/STATS.pkl', 'wb') as f:
    pickle.dump(stats_df, f)

## Hospitals Addresses

In [26]:
with open(r'../data/HOSPITALS.pkl', 'rb') as f:
    hospitals_df = pickle.load(f)

In [28]:
hospitals_df.head()

Unnamed: 0,id,address,latitude,longitude,category
0,0,Alice Ho Miu Ling Nethersole Hospital,22.458515,114.174944,Hospital
1,1,North District Hospital,22.496759,114.124606,Hospital
2,2,North Lantau Hospital,22.282335,113.939275,Hospital
3,3,Pok Oi Hospital,22.445444,114.041639,Hospital
4,4,Pamela Youde Nethersole Eastern Hospital,22.270224,114.23677,Hospital


## Addresses

In [13]:
from webscraper import fetch_highrisk

In [16]:
high_risk_df = fetch_highrisk()

In [38]:
with open(r'../data/HIGH_RISK.pkl', 'rb') as f:
    high_risk_df = pickle.load(f)

In [39]:
high_risk_df

Unnamed: 0,id,sub_district_zh,sub_district_en,action_zh,action_en,location_en,location_zh,remarks_en,remarks_zh,source_url_1,source_url_2,start_date,end_date,type,case_no,case
0,12c65515-4747-5f77-ab29-f245927c8495,元朗,Yuen Long,家居檢疫,Home Confinees,"Block 3, Sereno Verde",蝶翠峰 3座,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,2020-02-06,2020-02-20,self,-,
1,231b10bc-0194-5da0-878d-9a062b23b7eb,葵涌,Kwai Chung,家居檢疫,Home Confinees,"Block A, Kwai Chung Building",葵涌大廈 A座,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,2020-02-04,2020-02-18,self,-,
2,2cbe3b86-cc9e-5905-8616-8c50e5c6ac86,青衣,Tsing Yi,家居檢疫,Home Confinees,"Block 1, Villa Esplanada",灝景灣 1座,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,2020-02-04,2020-02-18,self,-,
3,3c8a0f8d-5ebc-5348-8046-e54de8062c5c,鑽石山,Diamond Hill,家居檢疫,Home Confinees,"Pik Fung House, Fung Tak Estate",鳳德村碧鳳樓,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,2020-02-02,2020-02-16,self,-,
4,ee2399e4-2cb3-5075-92e6-835cc8719edd,沙田,Sha Tin,家居檢疫,Home Confinees,"Kwong Ning Building, Shatin Centre",沙田中心廣寧大廈,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,2020-01-31,2020-02-14,self,-,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,d56ff11f-70ed-578d-a60f-198600738946,秀茂坪,Sau Mau Ping,求醫,Medical,"Orthopedics and Traumatic Clinic, United Chris...",基督教聯合醫院矯形及創傷科（骨科）門診部,,,https://hk.news.appledaily.com/local/20200201/...,,2020-01-15,2020-01-15,self,14,14
171,be65e274-5dea-5868-a1ed-c9998db9b888,境外,Outside HK,逗留,Stay,Macau,澳門,,,https://www.thestandnews.com/society/%E6%AD%A6...,https://www.scmp.com/news/hong-kong/health-env...,2020-01-10,2020-01-14,self,12,12
172,1b7ec0c5-0773-59e4-a3de-8e10d9f36aac,上水,Sheung Shui,離港,Departure,Lo Wu Control Point,羅湖管制站,,,https://news.rthk.hk/rthk/ch/component/k2/1506...,https://hk.news.appledaily.com/local/20200201/...,2020-01-10,2020-01-10,self,14,14
173,69abfbab-30f6-5434-95df-8f5d7f3019b1,上水,Sheung Shui,抵港,Arrival,Lo Wu Control Point,羅湖管制站,,,https://news.rthk.hk/rthk/ch/component/k2/1506...,https://hk.news.appledaily.com/local/20200201/...,2020-01-10,2020-01-10,self,14,14


In [70]:
with open(r'../data/ADDRESS.pkl', 'rb') as f:
    address_df = pickle.load(f)

In [71]:
address_df = address_df.loc[
    0:0
]

In [72]:
address_df

Unnamed: 0,id,sub_district_zh,sub_district_en,location_en,location_zh,longitude,latitude
0,5c0c68c7-f6a2-51f0-9fb8-943b360316fc,葵涌,Kwai Chung,"Kwai Chung Building, 10-30 Shek Pui Stret",石貝街 10-30 號葵涌大廈,114.201,22.345


In [73]:
address_df.loc[0, 'longitude'] = 114.136282
address_df.loc[0, 'latitude'] = 22.364730

In [74]:
with open(r'../data/ADDRESS.pkl', 'wb') as f:
    pickle.dump(address_df, f)

In [35]:
high_risk_df.head(10)

Unnamed: 0,id,sub_district_zh,sub_district_en,action_zh,action_en,location_en,location_zh,remarks_en,remarks_zh,source_url_1,source_url_2,start_date,end_date,type,case_no,case
0,f434cb97-b3c8-5884-9daa-53fbcb7ba418,鑽石山,Diamond Hill,家居檢疫,Home Confinees,"Pik Fung House, Fung Tak Estate",鳳德村碧鳳樓,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,04/2,18/2,self,-,
1,0e65810a-5340-5cab-9df3-7d986c017325,沙田,Sha Tin,家居檢疫,Home Confinees,"Kwong Ning Building, Shatin Centre",沙田中心廣寧大廈,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,02/2,16/2,self,-,
2,995f78dd-31e8-55cf-806a-364f7e8327ad,天水圍,Tin Shui Wai,家居檢疫,Home Confinees,"Block 5, Sherwood Court, Kingswood Villas",嘉湖山莊賞湖居 5 座,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,31/1,14/2,self,-,
3,d65d9703-5442-5ae6-a9cc-ce9ca2e9ffa9,屯門,Tuen Mun,家居檢疫,Home Confinees,"Tip Ling House, Butterfly Estate",蝴蝶邨蝶翎樓,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,30/1,13/2,self,-,
4,139ef4e1-1688-5ccd-b960-6d1e7f003eac,藍田,Lam Tin,家居檢疫,Home Confinees,"Ping Shing House, Ping Tin Estate",平田邨平誠樓,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,30/1,13/2,self,-,
5,209f58c1-0b44-5748-b406-483d7138af49,上水,Sheung Shui,家居檢疫,Home Confinees,15 Po Sheung Tsuen,莆上村 15 號,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,30/1,13/2,self,-,
6,6fc9aecf-abcd-5eb9-a623-c964011051c4,粉嶺,Fanling,家居檢疫,Home Confinees,"Block 11, Dawning Views",牽晴間 11 座,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,30/1,13/2,self,-,
7,c0f5bc6a-82c8-5091-9e38-680d5d05299e,沙田,Sha Tin,家居檢疫,Home Confinees,"Block 5, Jubilee Garden",銀禧花園 5 座,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,30/1,13/2,self,-,
8,e7a1b5c4-abec-5818-ac62-413f25b3fe9c,大圍,Tai Wai,家居檢疫,Home Confinees,"Tower 5, Phase 2 Festival City",大圍名城 2 期 5 座,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,30/1,13/2,self,-,
9,cc3341f8-7e9a-5679-baf9-01ae2ae15f98,深水埗,Sham Shui Po,家居檢疫,Home Confinees,"Wing On Building, 206 Fuk Wah Street",福華街 206 號榮安大廈,,,https://www.chp.gov.hk/files/pdf/list_of_build...,https://www.chp.gov.hk/files/pdf/list_of_build...,30/1,13/2,self,-,


In [39]:
address_df = high_risk_df[['id', 'sub_district_zh', 'sub_district_en', 'location_en', 'location_zh']].copy()

In [40]:
address_df['longitude'] = None
address_df['latitude'] = None

In [41]:
from wuhan_functions import get_coordinates

In [42]:
with open(r'../data/old_format/ADDRESS.pkl', 'rb') as f:
    old_address_df = pickle.load(f)

In [43]:
for idx, row in address_df.iterrows():
    if old_address_df[old_address_df['address'] == row['location_en']].shape[0] == 1:
        address_df.at[idx, 'longitude'] = old_address_df[old_address_df['address'] == row['location_en']]['longitude'].values[0]
        address_df.at[idx, 'latitude'] = old_address_df[old_address_df['address'] == row['location_en']]['latitude'].values[0]

In [44]:
address_df.head()

Unnamed: 0,id,sub_district_zh,sub_district_en,location_en,location_zh,longitude,latitude
0,5c0c68c7-f6a2-51f0-9fb8-943b360316fc,葵涌,Kwai Chung,"Kwai Chung Building, 10-30 Shek Pui Stret",石貝街 10-30 號葵涌大廈,114.163,22.2793
1,f434cb97-b3c8-5884-9daa-53fbcb7ba418,鑽石山,Diamond Hill,"Pik Fung House, Fung Tak Estate",鳳德村碧鳳樓,114.201,22.3443
2,0e65810a-5340-5cab-9df3-7d986c017325,沙田,Sha Tin,"Kwong Ning Building, Shatin Centre",沙田中心廣寧大廈,114.19,22.383
3,139ef4e1-1688-5ccd-b960-6d1e7f003eac,天水圍,Tin Shui Wai,"Block 5, Sherwood Court, Kingswood Villas",嘉湖山莊賞湖居 5 座,114.0,22.4521
4,209f58c1-0b44-5748-b406-483d7138af49,藍田,Lam Tin,"Ping Shing House, Ping Tin Estate",平田邨平誠樓,114.237,22.3055


In [45]:
for idx, row in address_df[address_df['longitude'].isna()].iterrows():
    print(row['location_zh'])
    address_name = row['location_en'] + ', ' + row['sub_district_en']
    if 'high speed rail' in address_name.lower():
        latitude = 22.304080
        longitude = 114.166501
    elif '航空' in row['location_zh']:
        latitude = 22.308007
        longitude = 113.918803
    else:
        latitude, longitude = get_coordinates(address_name)
    address_df.at[idx, 'latitude'] = latitude
    address_df.at[idx, 'longitude'] = longitude

清河村清照樓
Getting the coordinates for new address: Ching Chiu House, Ching Ho Estate, Sheung Shui.
天慈村慈平樓
Getting the coordinates for new address: Tsz Ping House, Tin Tsz Estate, Tin Shui Wai.
香港理工大學勵志堂
Getting the coordinates for new address: Lizhi Hall, The Hong Kong Polytechnic University, Hung Hom.
天富苑榮富閣
Getting the coordinates for new address: Wing Fu House, Tin Fu Court, Tin Shui Wai.
鴨脷洲邨利添樓
Getting the coordinates for new address: Lei Tim House, Ap Lei Chau Estate, Ap Lei Chau.
大窩口邨富德樓
Getting the coordinates for new address: Fu Tak House, Tai Wo Hau Estate, Kwai Chung.
港景峰 1 座
Getting the coordinates for new address: Block 1, Victoria Towers, Jordan.
黃埔花園第 2 期 10 座
Getting the coordinates for new address: Block 10, Cherry Mansions, Whampoa Garden (Site2), Hung Hom.
媽横路福順樓
Getting the coordinates for new address: Fook Shun Building, 45-67 Ma Wang Road, Yuen Long.
嘉湖山莊麗湖居 4 座
Getting the coordinates for new address: Block 4, Lynwood Court, Kingswood Villas (Phase 5), Tin Shui Wai.

西九龍高鐵站
Getting the coordinates for new address: Hong Kong West Kowloon Station, Jordan.
深圳
Getting the coordinates for new address: Shen Zhen, Outside HK.
羅湖管制站
Getting the coordinates for new address: Lo Wu Control Point, Sheung Shui.
羅湖港鐵站
Getting the coordinates for new address: Lo Wu Station, Sheung Shui.
威爾斯親王醫院急症室
Getting the coordinates for new address: Prince of Wales Hospital Accident & Emergency Department, Sha Tin.
西九龍高鐵站
Getting the coordinates for new address: Hong Kong West Kowloon Station, Jordan.
高鐵 G1013
高鐵 G6543
西九龍高鐵站
Getting the coordinates for new address: Hong Kong West Kowloon Station, Jordan.
高鐵 G1013
高鐵 G6543
香港國際機場
Getting the coordinates for new address: Hong Kong International Airport, Lantau Island.
澳門
Getting the coordinates for new address: Macau, Outside HK.
羅湖管制站
Getting the coordinates for new address: Lo Wu Control Point, Sheung Shui.
羅湖管制站
Getting the coordinates for new address: Lo Wu Control Point, Sheung Shui.
廣東順德
Getting the coordinates for new 

In [48]:
with open(r'../data/ADDRESS.pkl', 'wb') as f:
    pickle.dump(address_df, f)

## Casese

In [5]:
with open(r'../data/CASES.pkl', 'rb') as f:
    cases_df = pickle.load(f)

In [10]:
from webscraper import fetch_cases

cases_df = fetch_cases()

In [11]:
cases_df

Unnamed: 0,case_no,onset_date,confirmation_date,gender,age,hospital_zh,hospital_en,status,type_zh,type_en,citizenship_zh,citizenship_en,detail_zh,detail_en,classification,source_url
8,25,,2020-02-07,M,58,北區醫院,North District Hospital,hospitalised,確診,Confirmed,香港,Hong Kong,患者在珠海及香港北區坑頭雍翠苑居住，並於珠海及澳門工作，2 月 4 日出現發燒和咳嗽。他 2...,The patient has both Zhuhai and Hong Kong (Ser...,imported,https://hk.on.cc/hk/bkn/cnt/news/20200207/bkn-...
7,26,,2020-02-07,M,42,威爾斯親王醫院,Prince of Wales Hospital,hospitalised,確診,Confirmed,香港,Hong Kong,患者居於沙田御龍山第10座。2月 3 日出現發燒和咳嗽，2 月 6 日到威爾斯親王醫院求醫，...,"The patient resides in Block 10, The Palazzo, ...",imported,https://news.rthk.hk/rthk/ch/component/k2/1507...
11,22,,2020-02-06,F,55,瑪嘉烈醫院,Princess Margaret Hospital,hospitalised,確診,Confirmed,香港,Hong Kong,該名女病人是第 21 宗確診個案的太太。 早前在東莞工作， 1月 17 日回港， 1 月 ...,The patient is the spouse of case no. 21. She ...,local_possibly_close_contact,https://news.rthk.hk/rthk/ch/component/k2/1507...
10,23,,2020-02-06,F,63,東區尤德夫人那打素醫院,Pamela Youde Nethersole Eastern Hospital,hospitalised,確診,Confirmed,香港,Hong Kong,患者1月26日開始咳嗽和疲倦，曾向中醫求診；2 月 5 日向私家醫生求診，再到東區醫院急症室...,The patient developed cough and fatigue on Jan...,local_unknown_source,https://hk.news.appledaily.com/local/20200206/...
9,24,,2020-02-06,F,65,威爾斯親王醫院,Prince of Wales Hospital,serious,確診,Confirmed,香港,Hong Kong,患者為一名獨居於沙田碩門邨瑞碩樓、有長期病患的65歲女子，本身有甲狀腺機能低下症及高血壓。1...,The patient is a 65-year-old female who lives ...,local_unknown_source,https://hk.news.appledaily.com/local/20200206/...
15,19,,2020-02-05,F,28,瑪嘉烈醫院,Princess Margaret Hospital,hospitalised,確診,Confirmed,香港,Hong Kong,該名女病人是第 17 宗確診個案的女兒。 2 月 4 日發燒，到聯合醫院隔離。,The female patient is the daughter of case no....,local_close_contact,https://news.rthk.hk/rthk/ch/component/k2/1506...
12,21,,2020-02-05,M,56,瑪嘉烈醫院,Princess Margaret Hospital,hospitalised,確診,Confirmed,香港,Hong Kong,男患者在青衣灝景灣一座居住，在深圳工作，長期往返兩地。患者曾在 1 月 21 日到過深圳，在...,The male patient lives in Tsing Yi and works i...,local_possibly,https://www.info.gov.hk/gia/general/202002/05/...
13,20,,2020-02-05,F,56,瑪嘉烈醫院,Princess Margaret Hospital,hospitalised,確診,Confirmed,香港,Hong Kong,該名女病人是第 17 宗確診個案的妻子。 2 月 4 日發燒，到聯合醫院隔離。,The female patient is the spouse of case no. 1...,local_close_contact,https://news.rthk.hk/rthk/ch/component/k2/1506...
16,18,,2020-02-05,M,25,瑪嘉烈醫院,Princess Margaret Hospital,hospitalised,確診,Confirmed,香港,Hong Kong,男患者住在土瓜灣馬頭圍道183號， 1 月 14 日到浸會醫院求醫，確診感染甲型流感，獲處方...,The male patient lives at 183 Ma Tau Wai Road....,local_unknown_source,https://news.rthk.hk/rthk/ch/component/k2/1506...
17,17,,2020-02-04,M,60,瑪嘉烈醫院,Princess Margaret Hospital,hospitalised,確診,Confirmed,香港,Hong Kong,男患者已退休，住在藍田康田苑裕康閣。1月22日起發燒及疲倦，1月30日有氣喘，病人分別於1月...,The male patient has retired and lives in Yue ...,local_unknown_source,https://www.thestandnews.com/society/%E6%AD%A6...


In [56]:
with open(r'../data/CASES.pkl', 'wb') as f:
    pickle.dump(cases_df, f)

## Awaiting

In [2]:
with open(r'../data/AWAITING.pkl', 'rb') as f:
    awaiting_df = pickle.load(f)

In [8]:
from webscraper import fetch_awaiting_time

awaiting_df = fetch_awaiting_time()

In [5]:
with open(r'../data/AWAITING.pkl', 'wb') as f:
    pickle.dump(awaiting_df, f)

In [9]:
awaiting_df

Unnamed: 0,name_zh,name_en,hospCode,hospTimeEn,topWait,district_zh,district_en,sub_district_zh,sub_district_en,topWait_value
0,明愛醫院,Caritas Medical Centre,CMC,8/2/2020 11:30am,< 1,深水埗,Sham Shui Po,深水埗,Sham Shui Po,0
1,廣華醫院,Kwong Wah Hospital,KWH,8/2/2020 11:30am,< 1,油尖旺,Yau Tsim Mong,旺角,Mong Kok,0
2,北大嶼山醫院,North Lantau Hospital,NLT,8/2/2020 11:30am,< 1,離島,Islands,東涌,Tung Chung,0
3,東區尤德夫人那打素醫院,Pamela Youde Nethersole Eastern Hospital,PYN,8/2/2020 11:30am,< 1,東區,Eastern,柴灣,Chai Wan,0
4,伊利沙伯醫院,Queen Elizabeth Hospital,QEH,8/2/2020 11:30am,< 1,油尖旺,Yau Tsim Mong,油麻地,Yau Ma Tei,0
5,瑪麗醫院,Queen Mary Hospital,QMH,8/2/2020 11:30am,< 1,中西區,Central & Western,薄扶林,Pok Fu Lam,0
6,長洲醫院,St John Hospital,SJH,8/2/2020 11:30am,< 1,離島,Islands,長洲,Cheung Chau,0
7,將軍澳醫院,Tseung Kwan O Hospital,TKO,8/2/2020 11:30am,< 1,西貢,Sai Kung,將軍澳,Tseung Kwan O,0
8,仁濟醫院,Yan Chai Hospital,YCH,8/2/2020 11:30am,< 1,荃灣,Tsuen Wan,荃灣,Tsuen Wan,0
9,雅麗氏何妙齡那打素醫院,Alice Ho Miu Ling Nethersole Hospital,AHN,8/2/2020 11:30am,> 1,大埔,Tai Po,大埔,Tai Po,1


In [5]:
awaiting_df['topWait_value'].max()


numpy.int64

In [7]:
{i: f'> {i} hr' for i in np.arange(0, awaiting_df['topWait_value'].max() + 1)}[awaiting_df['topWait_value'].max()]

'> 5 hr'