In [36]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim, GoogleV3
import numpy as np
# from googlemaps import GoogleMaps
import time
import folium
import pickle

In [37]:
url = r'https://wars.vote4.hk/en/high-risk'
hk_latitude, hk_longitude = 22.2793278, 114.1628131
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')

In [38]:
html_addresses = soup.find_all(
    'span', 
    class_=r'MuiTypography-root MuiTypography-h6 MuiTypography-colorTextPrimary'
)

# html_descriptions = soup.find_all(
#     'div',
#     class_=r'MuiBox-root jss572 high-risk__HighRiskCard-o7qs9y-0 gdBVup'
# )



In [39]:
high_risk_addresses = [job_elem.text + ', Hong Kong' for job_elem in html_addresses]
# high_risk_descriptions = [job_elem.text for job_elem in html_descriptions]

In [40]:
high_risk_addresses

['West Kowloon Station, Hong Kong',
 'Luk Kwai House Kwai Chung Estate, Hong Kong',
 'Lo Wu Customs Checkpoint, Hong Kong',
 'Kai Tak Cruise Terminal, Hong Kong',
 '"Enhanted Princess" Cruise, Hong Kong',
 'Block 1, Site 11, Whampoa Garden, Hong Kong',
 'Wah Cheung House, Ching Wah Court, Hong Kong',
 'Tower 2, The Coronation, Hong Kong',
 'Cheung Hong Estate, Hong Mei House, Hong Kong',
 'Four Seasons Hotel Hong Kong, Hong Kong',
 'W Hong Kong, Hong Kong',
 'The Ritz-Carlton, Hong Kong',
 '8/F, Tin Hei House, Tin Ping Estate, Hong Kong',
 'Block 2, Tseung Kwan O Plaza, Hong Kong',
 'Universal Towers, Hong Kong',
 'South Hillcrest, Hong Kong',
 'Tower 1, Lake Silver, Hong Kong',
 'Empire Hotel Kowloon, Tsim Sha Tsui, Hong Kong',
 'Floor 23, Li Po Chun Chambers, Hong Kong',
 '26/F, Tower 1A, Oceanaire, Hong Kong',
 'Alva Hotel by Royal, Hong Kong']

In [41]:
with open(r'../assets/coordinates_df.pkl', 'rb') as f:
    coordinates_df = pickle.load(f)

In [42]:
def pop_address(address):
    address_lst = address.split(',')
    return ','.join(address_lst[1:])
    
def get_coordinates(address):
    trial0 = 0
    trial1 = 0
    trial2 = 0
    location = None
    geolocator = Nominatim(user_agent='hk_explorer')
    while location is None and trial0 < 5:
        trial0 += 1
        try:
            location = geolocator.geocode(address)
        except:
            pass
        time.sleep(1)
    if location is None:
        simplified_address1 = pop_address(address)
        while location is None and trial1 < 5:
            trial1 += 1
            try:
                location = geolocator.geocode(simplified_address1)
            except:
                pass
            time.sleep(1)
    if location is None:
        simplified_address2 = pop_address(simplified_address1)
        while location is None and trial2 < 5:
            trial2 += 1
            try:
                location = geolocator.geocode(simplified_address2)
            except:
                pass
            time.sleep(1)
    if location:
        return (location.latitude, location.longitude)
    else:
        return None, None

In [43]:
for address in high_risk_addresses:
    if address in coordinates_df['address'].values:
        pass
    else:
        latitude, longitude = get_coordinates(address)
        coordinates_df = coordinates_df.append(
            pd.Series({
                'loc_id': coordinates_df['loc_id'].max() + 1,
                'address': address,
                'latitude': latitude,
                'longitude': longitude
            }),
            ignore_index=True
        )

In [50]:
coordinates_df

Unnamed: 0,loc_id,address,latitude,longitude
0,0,"West Kowloon Station, Hong Kong",22.30408,114.166501
1,1,"Block 1, Site 11, Whampoa Garden, Hong Kong",22.305096,114.19061
2,2,"Wah Cheung House, Ching Wah Court, Hong Kong",22.34882,114.10102
3,3,"Tower 2, The Coronation, Hong Kong",22.309601,114.165802
4,4,"Cheung Hong Estate, Hong Mei House, Hong Kong",22.350433,114.099415
5,5,"Four Seasons Hotel Hong Kong, Hong Kong",22.286634,114.1567
6,6,"W Hong Kong, Hong Kong",22.304636,114.160593
7,7,"The Ritz-Carlton, Hong Kong",22.303165,114.160212
8,8,"8/F, Tin Hei House, Tin Ping Estate, Hong Kong",22.503551,114.133889
9,9,"Block 2, Tseung Kwan O Plaza, Hong Kong",22.308929,114.262603


In [61]:
with open(r'../assets/coordinates_df.pkl', 'wb') as f:
    pickle.dump(coordinates_df,f)

## Simple Visualization using Folium

In [54]:
map_hk = folium.Map(location=[hk_latitude, hk_longitude], zoom_start=9, tiles='stamentoner')
# folium.TileLayer('stamentoner').add_to(map_hk)

for idx, row in coordinates_df.iterrows():
    address = row['address']
    latitude = row['latitude']
    longitude = row['longitude']
#     print(latitude)
    if not np.isnan(latitude) and not np.isnan(longitude):
#         print(f'add {address}')
        folium.CircleMarker(
            [latitude, longitude],
            radius=5,
            popup=address,
            color='red',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False
        ).add_to(map_hk)

In [55]:
map_hk

## Plotly Visualization

In [56]:
with open(r'../assets/.mapbox_token', 'rb') as f:
    token = pickle.load(f)

In [60]:
import plotly.express as px
px.set_mapbox_access_token(token)
fig = px.scatter_mapbox(
    coordinates_df, 
    lat="latitude", 
    lon="longitude",     
    hover_name = 'address',
    zoom=10,
    title=r'Coronavirus High Risk Areas',
    size=[1] * coordinates_df.shape[0],
    size_max=6,
    height=900
)
fig.show()