# Convert Skyrise Greenery KML to Shapefile

File to convert the Skyrise Greenery KML file into a proper Shapefile using Regular Expressions.

### Loading Libraries

In [1]:
import geopandas as gpd
from fiona.drvsupport import supported_drivers
import re
import pandas as pd
from utility import export_df_to_shapefile

### Load Raw Dataset

In [2]:
supported_drivers['KML'] = 'rw'
df = gpd.read_file('../data/nparks-skyrise-greenery-kml.kml', driver='KML')

The Description column contains attributes stored in HTML Format

In [3]:
df.head()

Unnamed: 0,Name,Description,geometry
0,kml_1,<center><table><tr><th colspan='2' align='cent...,POINT Z (103.86007 1.29945 0.00000)
1,kml_2,<center><table><tr><th colspan='2' align='cent...,POINT Z (103.85561 1.31063 0.00000)
2,kml_3,<center><table><tr><th colspan='2' align='cent...,POINT Z (103.85649 1.31139 0.00000)
3,kml_4,<center><table><tr><th colspan='2' align='cent...,POINT Z (103.85796 1.32109 0.00000)
4,kml_5,<center><table><tr><th colspan='2' align='cent...,POINT Z (103.86622 1.32076 0.00000)


In [4]:
df.loc[0, "Description"]

'<center><table><tr><th colspan=\'2\' align=\'center\'><em>Attributes</em></th></tr><tr bgcolor="#E3E3F3"> <th>Y</th> <td>31312.5274</td> </tr><tr bgcolor=""> <th>X</th> <td>30977.219626</td> </tr><tr bgcolor="#E3E3F3"> <th>POSTAL_CODE</th> <td>199591</td> </tr><tr bgcolor=""> <th>GARDENID</th> <td>97</td> </tr><tr bgcolor="#E3E3F3"> <th>ADDRESS</th> <td>7500A Beach Road Singapore 199591</td> </tr><tr bgcolor=""> <th>PROJECT_NAME</th> <td>Parkroyal on Beach Road</td> </tr><tr bgcolor="#E3E3F3"> <th>INC_CRC</th> <td>30DE7C10E49125D2</td> </tr><tr bgcolor=""> <th>FMEL_UPD_D</th> <td>20160317140030</td> </tr></table></center>'

### Transforming the HTML Description using Regular Expressions

Note a very <em>elegant</em> approach but works for this dataset

In [5]:
dct_name_X = {}
dct_name_Y = {}
dct_name_postal_code = {}
dct_name_gardenid = {}
dct_name_address = {}
dct_name_project_name = {}
dct_name_inc_crc = {}
dct_name_fmel_upd_d = {}


for idx, row in df.iterrows():
    text = row["Description"]

    re_tags_to_remove = [
        "<[/]*center>",
        "<[/]*table>",
        "<[/]*em>",
        "<[/]*th\s*[\w,\d,\#,\=,\",\',\s]*>",
        "<tr>Attributes</tr>",
        ","
    ]
    for regex in re_tags_to_remove:
        text = re.sub(regex, "", text)

    text = text.strip()
    text = re.sub(r"<tr\s*[\w,\d,\#,\=,\",\']*>", "<tr>", text)

    re_tags_to_replace = {
        "\s*<[/]*tr>\s*" : "",
        "\s*<td>\s*" : ":",
        "\s*</td>\s*" : ", ",
    }
    for old_regex, new_regex in re_tags_to_replace.items():
        text = re.sub(old_regex, new_regex, text)

    lst_of_attributes = text.split(", ")
    dct_of_attributes = {i.split(":")[0]:i.split(":")[1] for i in lst_of_attributes if i != ""}

    dct_name_X[row["Name"]] = dct_of_attributes["X"]
    dct_name_Y[row["Name"]] = dct_of_attributes["Y"]
    dct_name_postal_code[row["Name"]] = dct_of_attributes["POSTAL_CODE"]
    dct_name_gardenid[row["Name"]] = dct_of_attributes["GARDENID"]
    dct_name_address[row["Name"]] = dct_of_attributes["ADDRESS"]
    dct_name_project_name[row["Name"]] = dct_of_attributes["PROJECT_NAME"]
    dct_name_inc_crc[row["Name"]] = dct_of_attributes["INC_CRC"]
    dct_name_fmel_upd_d[row["Name"]] = dct_of_attributes["FMEL_UPD_D"]

In [6]:
df["X"] = df["Name"].map(dct_name_X)
df["Y"] = df["Name"].map(dct_name_Y)
df["POSTAL_CODE"] = df["Name"].map(dct_name_postal_code)
df["GARDENID"] = df["Name"].map(dct_name_gardenid)
df["ADDRESS"] = df["Name"].map(dct_name_address)
df["PROJECT_NAME"] = df["Name"].map(dct_name_project_name)
df["INC_CRC"] = df["Name"].map(dct_name_inc_crc)
df["FMEL_UPD_D"] = df["Name"].map(dct_name_fmel_upd_d)

df.drop(columns=["Name", "Description"], inplace=True)

### Final Form

Note that column names longer than 10 characters will be truncated in a Shapefile

In [7]:
gdf = export_df_to_shapefile(df, "skyrise_greenery")

  gdf.to_file(f"../data/{filename}/{filename}.shp")


In [9]:
gdf.sample(5)

Unnamed: 0,geometry,X,Y,POSTAL_CODE,GARDENID,ADDRESS,PROJECT_NAME,INC_CRC,FMEL_UPD_D
63,POINT Z (103.85178 1.36359 0.00000),30055.02873,38404.803100000005,560336,7,336 Ang Mo Kio Avenue 1 Singapore 560336,HDB Residential,3D0AF3A4BB392780,20160317140030
246,POINT Z (103.77395 1.30532 0.00000),21392.704713,31961.4337,138601,193,36 College Avenue East Sinvapore 138601,NUS U-Town Hall of Residence,CEAF2DBCC1BB5935,20160317140030
102,POINT Z (103.85000 1.28492 0.00000),29856.376146,29705.812100000003,48692,53,1 Philip Street Singapore 048692,Lippo Centre,75CEB86F7BE78847,20160317140030
326,POINT Z (103.87654 1.39038 0.00000),32810.498573,41366.4732,790410,309,410 Fernvale Road Singapore 790410,MSCP,6D466AA181386C92,20160317140030
207,POINT Z (103.90304 1.30414 0.00000),35759.461373,31831.2353,428762,442,42 East Coast Road Singapore 428762,MODA Condominium,6BCBC9BE76909267,20160317140030
