# Import Necessary Library

In [85]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Web Scrape

In [2]:
link = "https://www.hko.gov.hk/en/cis/stn.htm"
response = requests.get(link)

In [5]:
soup = BeautifulSoup(response.text, 'html.parser')

In [6]:
soup

<!DOCTYPE html>

<html dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>Information of Weather Station｜Hong Kong Observatory(HKO)｜Climate Information Service</title>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<meta content="Information of Weather Station" name="keywords"/>
<meta content="Information of Weather Station" name="description"/>
<meta content="2018123100000007346" name="recordId"/>
<meta content="en" name="lang"/>
<meta content="468280339992518" property="fb:app_id">
<meta content="website" property="og:type">
<meta content="Information of Weather Station" property="og:title">
<meta content="Back Manned Weather Station(Station Code)(Date of first operation) Location Elevation of ground above mean sea-level (metres) Meteorological Element LatitudeN LongitudeE Wind Temp Wet-bulbTemp Dew PointTemp Relative Humidity Pressure Rainfall Cloud" property="og:description">
<meta content="" pr

# Parse html

In [149]:
region = [tag.get_text(separator="\n").split("\n")[0].strip("*") for tag in soup.select("td:nth-of-type(1)")[1:]]

In [133]:
latitude = [lat.text for lat in soup.select("td:nth-of-type(2)")]

In [134]:
longitude = [long.text for long in soup.select("td:nth-of-type(3)")]

# Store in DataFrame

In [233]:
dict_gcs = {'region': region, 'latitude_raw': latitude, 'longitude_raw': longitude}

In [234]:
df_gcs = pd.DataFrame(dict_gcs).drop_duplicates().reset_index(drop=True).iloc[:80]

In [235]:
df_gcs

Unnamed: 0,region,latitude_raw,longitude_raw
0,Hong Kong Observatory,"22°18'07""","114°10'27"""
1,Hong Kong International Airport,"22°18'34""","113°55'19"""
2,Beas River,"22°29'36""","114°06'18"""
3,Bluff Head,"22°11'51""","114°12'43"""
4,Central Pier,"22°17'20""","114°09'21"""
...,...,...,...
75,Tai Po Wong Shiu Chi Secondary School,"22°26'44""","114°10'18"""
76,Tap Shek Kok,"22°22'45""","113°55'12"""
77,Tsim Bei Tsui,"22°29'11""","114°00'42"""
78,Tuen Mun Reservior,"22°24'27""","113°59'14"""


# Angle Conversion

In [241]:
def angle_conversion(angle):
    degree_sign = "°"
    minute_sign = "'"
    second_sign = '"'
    all_sign = degree_sign + minute_sign + second_sign
    if degree_sign in angle:
        degree = float(angle.split(degree_sign)[0].strip(all_sign))
    else:
        degree = 0
    if minute_sign in angle:
        minute = float(angle.split(minute_sign)[0][-2:].strip(all_sign))
    else:
        minute = 0
    if second_sign in angle:
        second = float(angle.split(second_sign)[0][-2:].strip(all_sign))
    else:
        second = 0
    degree_total = degree + minute / 60 + second / 3600
    return degree_total

In [238]:
df_gcs['latitude'] = df_gcs['latitude_raw'].apply(angle_conversion)

In [243]:
df_gcs['longitude'] = df_gcs['longitude_raw'].apply(angle_conversion)

In [246]:
df_gcs_final = df_gcs[['region', 'latitude', 'longitude']]

In [247]:
df_gcs_final

Unnamed: 0,region,latitude,longitude
0,Hong Kong Observatory,22.301944,114.174167
1,Hong Kong International Airport,22.309444,113.921944
2,Beas River,22.493333,114.105000
3,Bluff Head,22.197500,114.211944
4,Central Pier,22.288889,114.155833
...,...,...,...
75,Tai Po Wong Shiu Chi Secondary School,22.445556,114.171667
76,Tap Shek Kok,22.379167,113.920000
77,Tsim Bei Tsui,22.486389,114.011667
78,Tuen Mun Reservior,22.407500,113.987222


In [248]:
df_gcs_final.to_csv("gcs.csv")