# Import Necessary Library

In [1]:
# data maniuplation
import pandas as pd
# web scraping
import requests
from bs4 import BeautifulSoup

# Web Scrape

In [2]:
# This is the link include the region and its respective latitude and longitude
link = "https://www.hko.gov.hk/en/cis/stn.htm"
response = requests.get(link)

# Parse html

In [3]:
# parse html
soup = BeautifulSoup(response.text, 'html.parser')

In [4]:
# get the list of region 
region = [tag.get_text(separator="\n").split("\n")[0].strip("*") for tag in soup.select("td:nth-of-type(1)")[1:]]

In [5]:
# get the list of latitude
latitude = [lat.text for lat in soup.select("td:nth-of-type(2)")]

In [6]:
# get the list of longitude
longitude = [long.text for long in soup.select("td:nth-of-type(3)")]

# Store in DataFrame

In [7]:
# combine region, latitude and longitude into dictionary
dict_gcs = {'region': region, 'latitude_raw': latitude, 'longitude_raw': longitude}

In [8]:
# create dataframe from dictionary
df_gcs = pd.DataFrame(dict_gcs).drop_duplicates().reset_index(drop=True).iloc[:80]

# Angle Conversion

In [9]:
def angle_conversion(angle):
    '''
    Output: angle in degree with decimal
    Input: angle in degree, minute and second
    Note: identify degree, minute and second by its sign 
    '''
    degree_sign = "°"
    minute_sign = "'"
    second_sign = '"'
    all_sign = degree_sign + minute_sign + second_sign
    if degree_sign in angle:
        degree = float(angle.split(degree_sign)[0].strip(all_sign))
    else:
        degree = 0
    if minute_sign in angle:
        minute = float(angle.split(minute_sign)[0][-2:].strip(all_sign))
    else:
        minute = 0
    if second_sign in angle:
        second = float(angle.split(second_sign)[0][-2:].strip(all_sign))
    else:
        second = 0
    degree_total = degree + minute / 60 + second / 3600
    return degree_total

In [10]:
# convert the latitude to degree only
df_gcs['latitude'] = df_gcs['latitude_raw'].apply(angle_conversion)

In [11]:
# convert the longitude to degree only
df_gcs['longitude'] = df_gcs['longitude_raw'].apply(angle_conversion)

In [12]:
# include desired columns only
df_gcs_final = df_gcs[['region', 'latitude', 'longitude']]

In [13]:
df_gcs_final

Unnamed: 0,region,latitude,longitude
0,Hong Kong Observatory,22.301944,114.174167
1,Hong Kong International Airport,22.309444,113.921944
2,Beas River,22.493333,114.105000
3,Bluff Head,22.197500,114.211944
4,Central Pier,22.288889,114.155833
...,...,...,...
75,Tai Po Wong Shiu Chi Secondary School,22.445556,114.171667
76,Tap Shek Kok,22.379167,113.920000
77,Tsim Bei Tsui,22.486389,114.011667
78,Tuen Mun Reservior,22.407500,113.987222


# Save as csv

In [14]:
# save it in csv
df_gcs_final.to_csv("gcs.csv")