In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from geopy.geocoders import Nominatim, GoogleV3
import numpy as np
# from googlemaps import GoogleMaps
import time
import folium
import pickle

In [2]:
url = r'https://wars.vote4.hk/en/high-risk'
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')

In [3]:
job_elems = soup.find_all('span', class_=r'MuiTypography-root MuiTypography-h6 MuiTypography-colorTextPrimary')

In [4]:
high_risk_addresses = [job_elem.text + ', Hong Kong' for job_elem in job_elems]


In [5]:
coordinates_df = pd.DataFrame(columns=['address', 'latitude', 'longitude'])

In [6]:
def pop_address(address):
    address_lst = address.split(',')
    return ','.join(address_lst[1:])
    
def get_coordinates(address):
    trial0 = 0
    trial1 = 0
    trial2 = 0
    location = None
    while location is None and trial0 < 5:
        trial0 += 1
        try:
            location = geolocator.geocode(address)
        except:
            pass
        time.sleep(1)
    if location is None:
        simplified_address1 = pop_address(address)
        while location is None and trial1 < 5:
            trial1 += 1
            try:
                location = geolocator.geocode(simplified_address1)
            except:
                pass
            time.sleep(1)
    if location is None:
        simplified_address2 = pop_address(simplified_address1)
        while location is None and trial2 < 5:
            trial2 += 1
            try:
                location = geolocator.geocode(simplified_address2)
            except:
                pass
            time.sleep(1)
    if location:
        return (location.latitude, location.longitude)
    else:
        return None, None

In [7]:
address = 'Hong Kong' 
geolocator = Nominatim(user_agent='hk_explorer')
location = geolocator.geocode(address)
hk_latitude = location.latitude
hk_longitude = location.longitude


In [8]:
for address in high_risk_addresses:
    latitude, longitude = get_coordinates(address)
    coordinates_df = coordinates_df.append(
        pd.Series({
            'address': address,
            'latitude': latitude,
            'longitude': longitude
        }),
        ignore_index=True
    )

In [9]:
with open(r'../assets/coordinates_df.pkl', 'wb') as f:
    pickle.dump(coordinates_df,f)

In [10]:
with open(r'../assets/coordinates_df.pkl', 'rb') as f:
    coordinates_df = pickle.load(f)

## Simple Visualization using Folium

In [11]:
map_hk = folium.Map(location=[hk_latitude, hk_longitude], zoom_start=9, tiles='stamentoner')
# folium.TileLayer('stamentoner').add_to(map_hk)

for idx, row in coordinates_df.iterrows():
    address = row['address']
    latitude = row['latitude']
    longitude = row['longitude']
#     print(latitude)
    if not np.isnan(latitude) and not np.isnan(longitude):
#         print(f'add {address}')
        folium.CircleMarker(
            [latitude, longitude],
            radius=5,
            popup=address,
            color='red',
            fill=True,
            fill_color='#3186cc',
            fill_opacity=0.7,
            parse_html=False
        ).add_to(map_hk)

In [12]:
map_hk

## Plotly Visualization

In [15]:

with open(r'../assets/.mapbox_token', 'rb') as f:
    token = pickle.load(f)

In [41]:
import plotly.express as px
px.set_mapbox_access_token(token)
fig = px.scatter_mapbox(
    coordinates_df, 
    lat="latitude", 
    lon="longitude",     
#     color=["red"]*coordinates_df.shape[0], 
#     size=[1]*coordinates_df.shape[0],
#     text='address',
    hover_name = 'address',
    zoom=9,
    title=r'Coronovirus High Risk Areas')
fig.show()