# Convert MSS Station Locations into Shapefile

Url of PDF with Meteorological Service Stations Information: ```http://www.weather.gov.sg/wp-content/uploads/2022/06/Station_Records.pdf```

### Loading Libraries

In [1]:
import pandas as pd
import geopandas as gpd
import tabula
from utility import export_df_to_shapefile

import warnings
warnings.filterwarnings('ignore')

### Use Tabula to Convert Tables in PDF into DataFrame

In [2]:
# 3 Pages in Total in PDF
all_tables = tabula.read_pdf("../data/Station_Records.pdf", pages="1-3")

#### Page 1

In [3]:
page_1 = all_tables[0]

In [4]:
page_1

Unnamed: 0.1,Unnamed: 0,Position,Period of Daily,Period of,Period of Mean,Period of Max and,Period of Mean.1,Period of Max
0,,,Rain Records,"30,60,120-Min Rain",Temperature,Min Temperature,Wind Speed,Wind Speed
1,,,,Records,,,,
2,Station,Lat.(N) Long. (E),,,,,,
3,Paya Lebar,1.3524 103.9007,Jan 1980-current,-,Sep 2017-current,Jan 1981-current,Jan 1981- current,Jan 2010-current
4,Tengah,1.3858 103.7114,Jan 1980-current,-,Aug 1986-current,Jan 1985-current,Jan 1985-current,Jan 2010-current
5,Changi,1.3678 103.9826,Jan 1981-current,Jan 2014-current,Jan 1982-current,Jan 1982-current,Jan 1983-current,Jan 1983-current
6,Seletar,1.4166 103.8654,Jan 1980-current,-,Aug 1986-current,Jan 1985-current,Jan 1985-current,Jan 2010-current
7,Tai Seng,1.3399 103.8878,Jan 1980-current,Jan 2014-current,May 2011-current,May 2011-current,May 2011-current,May 2011-current
8,Jurong (West),1.3455 103.6806,Jan 1980-current,Jan 2014-current,Mar 2009-current,Mar 2009-current,Mar 2009-current,Mar 2009-current
9,Ang Mo Kio,1.3764 103.8492,Sep 2009-current,Jan 2014-current,Sep 2009-current,Sep 2009-current,Sep 2009-current,Sep 2009-current


Problematic Entries: 
- East Coast (index 17) --> use index 18's Coordinates instead [actual name: East Coast Parkway]
- Choa Chu Kang (index 24) --> use index 25's Coordinates instead [actual name: Choa Chu Kang (South)]
- Macritchie (index 27) --> use index 28's Coordinates instead [actual name: Macritchie]
- Lower Peirce (index 29) --> use index 30's Coordinates instead [actual name: Lower Peirce Reservoir]

Index to delete:
- 0, 1, 2, 18, 25, 28, 30

In [5]:
#### Fix the Station Name and Geocordinates of Problematic Entries
page_1.loc[17, "Unnamed: 0"] = "East Coast Parkway"
page_1.loc[17, "Position"] = page_1.loc[18, "Position"]

page_1.loc[24, "Unnamed: 0"] = "Choa Chu Kang (South)"
page_1.loc[24, "Position"] = page_1.loc[25, "Position"]

page_1.loc[27, "Unnamed: 0"] = "Macritchie Reservoir"
page_1.loc[27, "Position"] = page_1.loc[28, "Position"]

page_1.loc[29, "Unnamed: 0"] = "Lower Peirce Reservoir"
page_1.loc[29, "Position"] = page_1.loc[30, "Position"]

#### Delete extra rows
page_1.drop([0, 1, 2, 18, 25, 28, 30], axis=0, inplace=True)
page_1.reset_index(drop=True, inplace=True)

#### Subset Station Name and coordinates columns and Rename Columns
page_1 = page_1[["Unnamed: 0", "Position"]].copy()
page_1.rename(columns={"Unnamed: 0" : "station", "Position": "coordinates"}, inplace=True)

In [6]:
page_1

Unnamed: 0,station,coordinates
0,Paya Lebar,1.3524 103.9007
1,Tengah,1.3858 103.7114
2,Changi,1.3678 103.9826
3,Seletar,1.4166 103.8654
4,Tai Seng,1.3399 103.8878
5,Jurong (West),1.3455 103.6806
6,Ang Mo Kio,1.3764 103.8492
7,Clementi,1.3337 103.7768
8,Admiralty,1.4439 103.7854
9,Sentosa Island,1.2500 103.8279


#### Page 2

In [7]:
page_2 = all_tables[1]

In [8]:
page_2

Unnamed: 0.1,Unnamed: 0,Position,Period of Daily,Period of,Period of Mean,Period of Max and,Period of Mean.1,Period of Max
0,,,Rain Records,"30,60,120-Min Rain",Temperature,Min Temperature,Wind Speed,Wind Speed
1,,,,Records,,,,
2,Station,Lat.(N) Long. (E),,,,,,
3,Ulu Pandan,1.3329 103.7556,Jan 1980-current,Jan 2014-current,,,,
4,Serangoon,1.3382 103.8657,Jan 1980-current,Jan 2014-current,,,,
5,Jurong (East),1.3262 103.7354,Jan 1980-Sep 2014,Jan 2014-Sep 2014,,,,
6,Mandai,1.4036 103.7898,Jan 1980-current,Jan 2014-current,,,,
7,Upper Thomson,1.3416 103.8106,Jan 1980-Dec 2020,Jan 2014-Dec 2020,,,,
8,Buangkok,1.3837 103.8860,Jan 1980-Feb 2019,Jan 2014-Feb 2019,,,,
9,Boon Lay (West),1.3275 103.7042,Jan 1980-Dec 2017,Jan 2014-Dec 2017,,,,


Problematic Entries: 
- Upper Peirce (index 17) --> use index 18's Coordinates instead [actual name: Upper Peirce Reservoir]
- Choa Chu Kang (index 31) --> use index 32's Coordinates instead [actual name: Choa Chu Kang (West)]

Index to delete:
- 0, 1, 2, 18, 32

In [9]:
#### Fix the Station Name and Geocordinates of Problematic Entries
page_2.loc[17, "Unnamed: 0"] = "Upper Peirce Reservoir"
page_2.loc[17, "Position"] = page_2.loc[18, "Position"]

page_2.loc[31, "Unnamed: 0"] = "Choa Chu Kang (West)"
page_2.loc[31, "Position"] = page_2.loc[32, "Position"]

#### Delete extra rows
page_2.drop([0, 1, 2, 18, 32], axis=0, inplace=True)
page_2.reset_index(drop=True, inplace=True)

#### Subset Station Name and coordinates columns and Rename Columns
page_2 = page_2[["Unnamed: 0", "Position"]].copy()
page_2.rename(columns={"Unnamed: 0" : "station", "Position": "coordinates"}, inplace=True)

In [10]:
page_2

Unnamed: 0,station,coordinates
0,Ulu Pandan,1.3329 103.7556
1,Serangoon,1.3382 103.8657
2,Jurong (East),1.3262 103.7354
3,Mandai,1.4036 103.7898
4,Upper Thomson,1.3416 103.8106
5,Buangkok,1.3837 103.8860
6,Boon Lay (West),1.3275 103.7042
7,Bukit Panjang,1.3824 103.7603
8,Kranji Reservoir,1.4387 103.7363
9,Tanjong Pagar,1.2740 103.8482


#### Page 3

In [11]:
page_3 = all_tables[2]
page_3

Unnamed: 0.1,Unnamed: 0,Position,Period of Daily,Period of,Period of Mean,Period of Max and,Period of Mean.1,Period of Max
0,,,Rain Records,"30,60,120-Min Rain",Temperature,Min Temperature,Wind Speed,Wind Speed
1,,,,Records,,,,
2,Station,Lat.(N) Long. (E),,,,,,
3,Serangoon North,1.3606 103.8697,Jun 2009-Apr 2017,Jan 2014-Apr 2017,,,,
4,Lim Chu Kang,1.4385 103.7013,Jan 2010-current,Jan 2014-current,,,,
5,Marine Parade,1.3065 103.9107,May 2009-current,Jan 2014-current,,,,
6,Choa Chu Kang,,,,,,,
7,(Central),1.3819 103.7386,Dec 2009-current,Jan 2014-current,,,,
8,Dhoby Ghaut,1.2994 103.8461,Sep 2010-current,Jan 2014-current,,,,
9,Nicoll Highway,1.3011 103.8666,Sep 2010-current,Jan 2014-current,,,,


Problematic Entries: 
- Choa Chu Kang (index 6) --> use index 7's Coordinates instead [actual name: Choa Chu Kang (Central)]

Index to delete:
- 0, 1, 2, 7

In [12]:
#### Fix the Station Name and Geocordinates of Problematic Entries
page_3.loc[6, "Unnamed: 0"] = "Choa Chu Kang"
page_3.loc[6, "Position"] = page_3.loc[7, "Position"]

#### Delete extra rows
page_3.drop([0, 1, 2, 7], axis=0, inplace=True)
page_3.reset_index(drop=True, inplace=True)

#### Subset Station Name and coordinates columns and Rename Columns
page_3 = page_3[["Unnamed: 0", "Position"]].copy()
page_3.rename(columns={"Unnamed: 0" : "station", "Position": "coordinates"}, inplace=True)

In [13]:
page_3

Unnamed: 0,station,coordinates
0,Serangoon North,1.3606 103.8697
1,Lim Chu Kang,1.4385 103.7013
2,Marine Parade,1.3065 103.9107
3,Choa Chu Kang,1.3819 103.7386
4,Dhoby Ghaut,1.2994 103.8461
5,Nicoll Highway,1.3011 103.8666
6,Botanic Garden,1.3087 103.8180
7,Whampoa,1.3214 103.8577


### Combine 3 Tables and Create Coordinates Columns

In [14]:
df = pd.concat([page_1, page_2, page_3])
df.reset_index(drop=True, inplace=True)
df

Unnamed: 0,station,coordinates
0,Paya Lebar,1.3524 103.9007
1,Tengah,1.3858 103.7114
2,Changi,1.3678 103.9826
3,Seletar,1.4166 103.8654
4,Tai Seng,1.3399 103.8878
...,...,...
58,Choa Chu Kang,1.3819 103.7386
59,Dhoby Ghaut,1.2994 103.8461
60,Nicoll Highway,1.3011 103.8666
61,Botanic Garden,1.3087 103.8180


In [15]:
df["latitude"] = df["coordinates"].apply(lambda x: x.split(" ")[0])
df["longitude"] = df["coordinates"].apply(lambda x: x.split(" ")[1])
df.drop(columns=["coordinates"], inplace=True)

df

Unnamed: 0,station,latitude,longitude
0,Paya Lebar,1.3524,103.9007
1,Tengah,1.3858,103.7114
2,Changi,1.3678,103.9826
3,Seletar,1.4166,103.8654
4,Tai Seng,1.3399,103.8878
...,...,...,...
58,Choa Chu Kang,1.3819,103.7386
59,Dhoby Ghaut,1.2994,103.8461
60,Nicoll Highway,1.3011,103.8666
61,Botanic Garden,1.3087,103.8180


### Export at Shapefile

In [21]:
gdf = gpd.GeoDataFrame(df, geometry= gpd.points_from_xy(df['longitude'], df['latitude']), crs='epsg:4326')
gdf.drop(columns=["latitude", "longitude"], inplace=True)
gdf

Unnamed: 0,station,geometry
0,Paya Lebar,POINT (103.90070 1.35240)
1,Tengah,POINT (103.71140 1.38580)
2,Changi,POINT (103.98260 1.36780)
3,Seletar,POINT (103.86540 1.41660)
4,Tai Seng,POINT (103.88780 1.33990)
...,...,...
58,Choa Chu Kang,POINT (103.73860 1.38190)
59,Dhoby Ghaut,POINT (103.84610 1.29940)
60,Nicoll Highway,POINT (103.86660 1.30110)
61,Botanic Garden,POINT (103.81800 1.30870)


In [22]:
export_df_to_shapefile(gdf, "weather_stations")

Unnamed: 0,station,geometry
0,Paya Lebar,POINT (103.90070 1.35240)
1,Tengah,POINT (103.71140 1.38580)
2,Changi,POINT (103.98260 1.36780)
3,Seletar,POINT (103.86540 1.41660)
4,Tai Seng,POINT (103.88780 1.33990)
...,...,...
58,Choa Chu Kang,POINT (103.73860 1.38190)
59,Dhoby Ghaut,POINT (103.84610 1.29940)
60,Nicoll Highway,POINT (103.86660 1.30110)
61,Botanic Garden,POINT (103.81800 1.30870)
