# Categorise Skyrise Buildings

### Loading Libraries

In [1]:
import geopandas as gpd
import pandas as pd
import re

from utility import export_df_to_shapefile

### Loading Skyrise Greenery Dataset

In [2]:
gdf_skyrise_greenery = gpd.read_file("../data/skyrise_greenery")
gdf_skyrise_greenery["name"] = gdf_skyrise_greenery["name"].str.lower()
df_skyrise_greenery = gdf_skyrise_greenery[["post_code", "address", "name", "type"]].copy()
df_skyrise_greenery.sample(5)

Unnamed: 0,post_code,address,name,type
335,790434,434 Fernvale Road Singapore 790434,sengkang n4c16 (p)-d&b,HDB
285,310480,Lorong 6 Toa Payoh Singapore 310480,hdb hub @ toa payoh,Retail
52,229540,10 Claymore Road Singapore 229540,pan pacific orchard,Hotel
111,48581,16 Raffles Quay Singapore 048581,hong leong building,Commercial
152,820169,169 Punggol Field Singapore 820169 (The Nautil...,punggol east c24a (s),HDB


In [3]:
df_school = df_skyrise_greenery.query("type == 'Education'")

### Loading External Datasets

- Hotel Listing Dataset (Hotel Licensing Board)
- Mall Locations Dataset (Manually Scraped)

In [4]:
df_hotels = pd.read_csv("../data/hotel_listing.csv")
df_shopping_malls = pd.read_csv("../data/singapore_mall_locations.csv")


In [7]:
df_hotels.sample(10)

Unnamed: 0,Accomodation Name,Hotel Keeper,Postal Code,Address,Hotel Tel No,Email Address,Hotel Fax No,Hotel Url,Status
534,"Oasia Hotel Novena, Singapore",Kenny Yeo,307470,8 Sinaran Drive,6566640333,frontofc.ohs@fareast.com.sg,,www.stayfareast.com,Active
742,V Hotel Bencoolen,Ng Lip Sek,189627,48 Bencoolen Street,6563882233,vhotel@vhotel.sg,,www.vhotel.sg,Active
27,Aliwal Park Hotel,Wong Lee Peng,199948,77 Aliwal Street,6562939022,aliwalpark77@gmail.com,,,Active
654,South East Asia Hotel,Huang Wen Hwa,187965,190 Waterloo Street,6563382394,seahotel@singnet.com.sg,,www.seahotel.com.sg,Active
517,New Cape Inn,Low Siew Voon,168891,3 Seng Poh Road,6565369939,newcapeinn@gmail.com,,www.capeinn-singapore.com,Ceased
688,The Claremont Hotel Singapore,Harpreet Kaur Bedi,218224,301 SERANGOON ROAD,6563923933,,,,Ceased
73,Bay Hotel Singapore,Philip Cyril Raj,98828,50 Telok Blangah Road,6568186666,enquiries@bayhotelsingapore.com,,www.bayhotelsingapore.com,Ceased
535,Oasia Resort Sentosa,Choo Hsiu Lien,98679,23 BEACH VIEW,6597331382,,,,Active
758,Vintage Inn@Clarke Quay,GOH KUAN KEAT,59672,33 HONGKONG STREET,6565322168,,,,Active
285,Hotel 81 - Cosy,Chu Poh Yong,89263,8 Jiak Chuan Road,6563258181,hotel81@hotel81.com.sg,,www.hotel81.com.sg,Active


In [8]:
df_shopping_malls.sample(10)

Unnamed: 0.1,Unnamed: 0,mall_names,latitude,longitude,address,postal_code,cleaned_mall_names
19,19,Holland Village Shopping Mall,1.310449,103.793522,"211 Holland Ave, Singapore 278967",278967,holland village shopping mall
33,33,Orchard Gateway,1.300464,103.837168,"277 Orchard Rd, Singapore 238858",238858,orchard gateway
145,145,Jem,1.3335,103.741529,"50 Jurong Gateway Rd, Singapore 608549",608549,jem
63,63,Bedok Mall,1.324402,103.927123,"SG, New Upper Changi Rd, 311, 467360",467360,bedok mall
56,56,The Poiz,1.331471,103.866146,"51 Upper Serangoon Rd, Singapore 347697",347697,the poiz
105,105,Woodlands North Plaza,1.442673,103.78855,"883 Woodlands Street 82, Singapore 730883",730883,woodlands north plaza
61,61,Wisma Atria,1.30372,103.831081,"435 Orchard Rd, Singapore 238877",238877,wisma atria
44,44,Sim Lim Square,1.303049,103.850737,"1 Rochor Canal Rd, Singapore 188504",188504,sim lim square
2,2,Aperia,1.310463,103.861984,"12 Kallang Ave, Singapore 339511",339511,aperia
59,59,Velocity@Novena Square,1.32001,103.84173,"238 Thomson Rd, Singapore 307683",307683,velocity@novena square


In [18]:
# clean dataset
df_shopping_malls["cleaned_mall_names"] = df_shopping_malls["mall_names"].str.lower()
df_shopping_malls["postal_code"] = df_shopping_malls["postal_code"].apply(
    lambda x: re.sub("\s+", "", x)
)
df_shopping_malls["postal_code"] = df_shopping_malls["postal_code"].apply(
    lambda x: f"{'0' * (6 - len(x))}{x}" if len(x) == 5 else x
)

df_hotels["cleaned_name"] = df_hotels["Accomodation Name"].str.lower()
df_hotels["postal_code"] = df_hotels["Postal Code"].astype(str)
df_hotels["postal_code"] = df_hotels["postal_code"].apply(
    lambda x: f"{'0' * (6 - len(x))}{x}" if len(x) == 5 else x
)

### Group into the below Categories

- Retail (Shopping Malls, Shophouses)
- Leisure (Tourist Attractions, Entertainment, Fitness areas, Restaurants)
- Communal (Community Centers, Libraries, Security Posts, Residents' Committee, Church or Temples)
- Healthcare (Hospitals/Clinics)
- Education (Schools, Hostels, Special Needs Centres)
- Commercial (Offices, Official Government Services)
- Private Residential
- HDB
- MSCP

In [13]:
def assign_category(postal_code, building_name):
    # ---------------------- Healthcare ----------------------
    list_of_healthcare_keywords = [
        "hospital",
        "hospital/medical centre",
        "polyclinic"
    ]
    for x in list_of_healthcare_keywords:
        if re.search(x, building_name):
            return "Healthcare"

    # ---------------------- Schools ----------------------
    list_of_school_keywords = [
        "school", "college", "skool",
        "ntu", "smu", "nus", "nie", 
        "singapore institute of management", 
        "polytechnic", "uwcsea", "mdis", "bca academy", "tca hostel trinity", 
        "lycee francais de singapour", "st andrew autism centre"
    ]
    for x in list_of_school_keywords:
        if re.search(x, building_name):
            return "Education"

    # ---------------------- Retail ----------------------
    # Mall and Commercial Centers
    lst_of_mall_keywords = [
        "mall",
        "shopping", 
        "shoppers", 
        "shophouse",
        "novena square", 
        "balestier plaza", 
        "orchard point", 
        "joo chiat complex", 
        "bras basah complex",
        "vivocity",
        "ue square",
        "shaw house",
        "giant building",
        "customs house",
        "psa nursery hawaii landscape"
    ]
    if postal_code in list(df_shopping_malls["postal_code"]) or building_name in list(df_shopping_malls["cleaned_mall_names"]):
        return "Retail"
    for x in lst_of_mall_keywords:
        if re.search(x, building_name):
            return "Retail"

    # ---------------------- Community ----------------------
    list_of_communal_keywords = [
        "residents\'* committee",
        "library", 
        "kampong",
        "coast guard",
        "\s+c\s*c",
        "columbarium",
        "cemetry",
        "temple",
        "church",
        "bible",
        "community",
        "disabled",
        "post office",
        "circle line"
    ]
    for x in list_of_communal_keywords:
        if re.search(x, building_name):
            return "Communal"

    # ---------------------- Leisure ----------------------
    lst_of_leisure_keywords = [
        "safra club mount faber",
        "chinese swimming complex",
        "home team academy",
        "universal studios singapore resorts world sentosa",
        "hortpark visitor centre",
        "singapore flyer",
        "chinese garden",
        "jurong bird park",
        "satay by the bay",
        "jurong central park - mcdonald",
        "aramsa spa @ bishan park",
        "villa halia",
        "sentosa",
        "marina barrage",
        "tanglin club",
        "museum",
        "marina south pier",
        "concert hall",
        "singapore botanic gardens",
        "country club"
    ]
    for x in lst_of_leisure_keywords:
        if re.search(x, building_name):
            return "Leisure"

    # ---------------------- Hotels ----------------------
    if postal_code in list(df_hotels["postal_code"]):
        return "Hotel"
    
    if building_name in list(df_hotels["cleaned_name"]):
        return "Hotel"

    list_of_hotels = ["hotel", "mandarin oriental", "ritz carlton", "fraser place robertson walk"]
    for x in list_of_hotels:
        if re.search(x, building_name):
            return "Hotel"

    # ---------------------- HDB ----------------------
    if building_name == "hdb residential":
        return "HDB"
    list_of_hdb_keywords = [
        "[a-z]\d",
        "teck ghee vista", 
        "treelodge @ punggol", 
        "53a strathmore avenue singapore 143053", 
        "23a telok blangah crescent singapore 091023",
        "block 840a yishun street 81 singapore 761840",
        "820 thomson road singapore 574623",
        "tiong bahru view"
    ]
    for x in list_of_hdb_keywords:
        if re.search(x, building_name):
            return "HDB"
    
    #MSCP
    if "mscp" in building_name:
        return "MSCP"
    
    # ---------------------- Private Residences ----------------------
    list_of_pte_residences_keywords = [
        "liang seah place",
        "craig place",
        "condominium",
        "no. 11 geylang lorong 35 - level 6",
        "claymore point",
        "winsland house i / lanson place"
    ]
    for x in list_of_pte_residences_keywords:
        if re.search(x, building_name):
            return "Private Residential"

    return "Commercial"

In [14]:
df_skyrise_greenery["type"] = df_skyrise_greenery.apply(
    lambda x: assign_category(x["post_code"], x["name"]), axis=1
)

In [15]:
df_skyrise_greenery["type"].value_counts()

HDB                    103
Commercial              76
Education               61
Retail                  41
Hotel                   40
MSCP                    35
Leisure                 20
Communal                16
Healthcare              11
Private Residential      6
Name: type, dtype: int64

### Export to new Shapefile

In [26]:
df_new_skyrise_greenery = pd.merge(gdf_skyrise_greenery, df_skyrise_greenery, on=["post_code", "address", "name"])

In [32]:
df_new_skyrise_greenery["name"] = df_new_skyrise_greenery["name"].apply(
    lambda x: x.title() if x != "mscp" else "MSCP"
)

In [35]:
gdf_new_skyrise_greenery = export_df_to_shapefile(df_new_skyrise_greenery, "skyrise_greenery")

In [37]:
gdf_new_skyrise_greenery.sample(5)

Unnamed: 0,X,Y,POSTAL_COD,GARDENID,ADDRESS,PROJECT_NA,INC_CRC,FMEL_UPD_D,geometry,TYPE
145,30873.080963,32327.5869,208581,96,9 King George's Avenue Singapore 208581,People'S Association Headquarters,B801B314FC2187BE,20160317140030,POINT Z (103.85913 1.30863 0.00000),Commercial
242,36461.258599,42277.0877,820180,189,180 Edgefield Plains Singapore 820180,MSCP,1117EDD772B74984,20160317140030,POINT Z (103.90935 1.39861 0.00000),MSCP
378,32989.380946,41797.9602,790430,473,430 Fernvale Link Singapore 790430,Sengkang N4C18A (S) - Fernvale Ridge,4167CA2128B8B2D5,20160317140030,POINT Z (103.87815 1.39428 0.00000),HDB
122,29306.534879,28463.456400000003,79908,73,81 Anson Road Singapore 079908,M Hotel,C215DEA38A4F0131,20160317140030,POINT Z (103.84506 1.27369 0.00000),Hotel
196,24448.348941,34416.416201,269734,428,661 Bukit Timah Road 269734,Hwa Chong Institution (Main School Building),5B7E5847A95B68C8,20160317140030,POINT Z (103.80141 1.32752 0.00000),Education


In [38]:
gdf_new_skyrise_greenery.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich