In [21]:
from lxml import html
import requests
import pandas as pd

def fetch_highrisk():
    """b
    Returns a list of dictionary that contains the high risk area information.

    """
    page = requests.get('https://wars.vote4.hk/en/high-risk')
    tree = html.fromstring(page.content)

    boxes = tree.xpath('//*[@id="gatsby-focus-wrapper"]/div/main/div[2]/div[contains(@class, "Card__StyledBox-sc-6m23vi-0 cNwpZn")]')

    res = []

    for box in boxes:
        district = box.xpath('./div[1]/div[1]/div[1]/div/span[contains(@class, "MuiTypography-body2")]/text()')
        address = box.xpath('./div[1]/div[1]/div[1]/div/span[contains(@class, "MuiTypography-h6")]/text()')
        msg = box.xpath('./div[1]/span/text()')

        res.append({'district': str(district[0]), 'address': str(address[0]), 'msg': str(msg[0])})

    return res

In [12]:
fetch_highrisk()

[{'district': 'Jordan',
  'address': 'West Kowloon Station',
  'msg': 'Multiple confirmed cases of novel coronavirus infection came into Hong Kong via High Speed Rail'},
 {'district': 'Kwai Chung',
  'address': 'Luk Kwai House Kwai Chung Estate',
  'msg': 'The flat where a confirmed novel coronavirus patient lives'},
 {'district': 'Sheung Shui',
  'address': 'Lo Wu Customs Checkpoint',
  'msg': 'A confirmed case went to Mainland via this customs checkpoint on 10 January.'},
 {'district': 'Kowloon City',
  'address': 'Kai Tak Cruise Terminal',
  'msg': 'A confirmed case arrived at this cruise terminal via cruise on 10 January.'},
 {'district': '-',
  'address': '"Enhanted Princess" Cruise',
  'msg': 'A confirmed case traveled on this cruise on 20 - 25 January.'},
 {'district': 'Hung Hom',
  'address': 'Block 1, Site 11, Whampoa Garden',
  'msg': 'A confirmed case of novel coronavirus, the patient has been to Wuhan in January 21 to 23; their families has now been relocated to the quarant

In [40]:
def fetch_cases():
    """
    Returns a list of dictionary that contains the confirmed cases information.

    """
    page = requests.get('https://wars.vote4.hk/en/cases')
    tree = html.fromstring(page.content)

    boxes = tree.xpath('//*[@id="gatsby-focus-wrapper"]/div/main/div[2]/div[contains(@class, "CaseCard__WarsCaseContainer-zltyy4-0")]')

    res = []

    for box in boxes:
        s = box.xpath('./div[1]/div/text()')
        casenum = s[0]
        status = s[1]
        age_and_gender = box.xpath('./div[2]/div[1]/text()')[0]
        date = box.xpath('./div[3]/div[1]/div[1]/text()')[0]
        residence = box.xpath('./div[3]/div[1]/div[2]/text()')[0]
        hospital = box.xpath('./div[3]/div[1]/div[3]/text()')[0]
        desc = box.xpath('./div[4]/p/text()')[0]
        
        #get age from age_and_gender
        age = age_and_gender.replace('Age', '').replace('Male', '').replace('Female', '').strip()
        
        #get gender from age_and_gender
        gender = None
        if 'male' in age_and_gender.lower():
            gender = 'Male'
        elif 'female' in age_and_gender.lower():
            gender = 'Female'

        res.append({
            'casenum': str(casenum),
            'status': str(status),
            'age': int(age),
            'gender': str(gender),
            'date': str(date),
            'residence': str(residence),
            'hospital': str(hospital),
            'desc': str(desc)
        })

    return pd.DataFrame(res)

In [41]:
pd.DataFrame(fetch_cases())

Unnamed: 0,casenum,status,age,gender,date,residence,hospital,desc
0,#15 (Confirmed),Hospitalised,72,Male,2020-02-02,Hong Kong,Queen Elizabeth Hospital,The female patient is the mother of case no 13...
1,#14 (Confirmed),Hospitalised,80,Male,2020-02-01,Hong Kong,Princess Margaret Hospital,The patient lives in Luk Kwai House in Kwai Ch...
2,#13 (Confirmed),Hospitalised,39,Male,2020-01-31,Hong Kong,Queen Elizabeth Hospital,The male patient with long term illness lives ...
3,#12 (Confirmed),Hospitalised,75,Male,2020-01-30,Hong Kong,Princess Margaret Hospital,The 75 year-old patient lives in Cheung Hong E...
4,#11 (Confirmed),Hospitalised,37,Male,2020-01-30,Hong Kong,Princess Margaret Hospital,\nThe female patient is the daughter of a coup...
5,#10 (Confirmed),Serious,72,Male,2020-01-29,Wuhan,Princess Margaret Hospital,The male patient arrived in Hong Kong by plane...
6,#9 (Confirmed),Hospitalised,73,Male,2020-01-29,Wuhan,Princess Margaret Hospital,The female patient arrived in Hong Kong by pla...
7,#8 (Confirmed),Hospitalised,64,Male,2020-01-26,Wuhan,Princess Margaret Hospital,The 64 year-old male patient from Wuhan has lo...
8,#7 (Confirmed),Critical,68,Male,2020-01-26,Hong Kong,Princess Margaret Hospital,The 68 year-old female patient lives in Fan Li...
9,#6 (Confirmed),Hospitalised,47,Male,2020-01-26,Hong Kong,Princess Margaret Hospital,The patient lied about his job of an accountan...


In [42]:
with open(r'../assets/data/CASES.pkl', 'wb') as f:
    pickle.dump(pd.DataFrame(fetch_cases()), f)

In [47]:
def fetch_awaiting_time():
    """
    Return a list of dictionary that contains the hospital awaiting time.

    """
    page = requests.get('https://wars.vote4.hk/en/ae-waiting-time')
    tree = html.fromstring(page.content)

    boxes = tree.xpath('//*[@id="gatsby-focus-wrapper"]/div/main/div[2]/div[contains(@class, "Card__StyledBox-sc-6m23vi-0 cNwpZn")]')

    res = []

    for box in boxes:
        district = box.xpath('./div[1]/div[1]/p[1]/text()')[0]
        hospital = box.xpath('./div[1]/div[1]/p[2]/text()')[0]
        time = box.xpath('./div[1]/h6/text()')[0]

        res.append({
             'district': str(district),
             'hospital': str(hospital),
             'time': str(time)
             })

    return pd.DataFrame(res)

In [48]:
fetch_awaiting_time()

Unnamed: 0,district,hospital,time
0,Sham Shui Po,Caritas Medical Centre,< 1 hr
1,Mong Kok,Kwong Wah Hospital,< 1 hr
2,Kwai Chung,Princess Margaret Hospital,< 1 hr
3,Yuen Long,Pok Oi Hospital,< 1 hr
4,Tsuen Wan,Yan Chai Hospital,< 1 hr
5,Tung Chung,North Lantau Hospital,> 1 hr
6,Chai Wan,Pamela Youde Nethersole Eastern Hospital,> 1 hr
7,Cheung Chau,St John Hospital,> 1 hr
8,Tin Shui Wai,Tin Shui Wai Hospital,> 1 hr
9,Tai Po,Alice Ho Miu Ling Nethersole Hospital,> 2 hr


In [49]:
with open(r'../assets/data/AWAITING.pkl', 'wb') as f:
    pickle.dump(pd.DataFrame(fetch_awaiting_time()), f)

In [24]:
def fetch_stat():
    """
    Return a list of 4 numbers that represent death, confirmed, investigating and reported numbers respectively.

    """
    statnames = ['Death', 'Confirmed', 'Investigating', 'Reported']
    
    page = requests.get('https://wars.vote4.hk/en/')
    tree = html.fromstring(page.content)

    boxes = tree.xpath('//div[contains(@class, "pages__DailyStatsContainer-sc-6kvjaa-1")]/div')
    res = [int(box.xpath('./p[1]/text()')[0]) for box in boxes]
    
    df = pd.DataFrame(data=[res], columns=statnames)

    return df

In [25]:
fetch_stat()

Unnamed: 0,Death,Confirmed,Investigating,Reported
0,0,15,168,882


In [29]:
import pickle
with open(r'../assets/data/STATS.pkl', 'wb') as f:
    pickle.dump(fetch_stat(), f)