# Categorise Skyrise Buildings

### Loading Libraries

In [34]:
import geopandas as gpd
import pandas as pd
import re

from utility import export_df_to_shapefile

### Loading Skyrise Greenery Dataset

In [18]:
gdf_skyrise_greenery = gpd.read_file("../data/skyrise_greenery")
gdf_skyrise_greenery["PROJECT_NA"] = gdf_skyrise_greenery["PROJECT_NA"].str.lower()
df_skyrise_greenery = gdf_skyrise_greenery[["POSTAL_COD", "ADDRESS", "PROJECT_NA"]].copy()
df_skyrise_greenery.sample(5)

Unnamed: 0,POSTAL_COD,ADDRESS,PROJECT_NA
6,207566,153 Tyrwhitt Road Singapore 207566,qinan building
356,440077,77A Marine Drive Singapore 440077,block 77a marine drive (mscp)
107,88934,3 Lim Teck Kim Road Singapore 088934,singapore technologies building
34,238884,583 Orchard Road Singapore 238884,forum the shopping mall
47,239693,1A Grange Road Singapore 239693,traders hotel singapore


### Loading External Datasets

- Hotel Listing Dataset (Hotel Licensing Board)
- Mall Locations Dataset (Manually Scraped)

In [19]:
df_hotels = pd.read_csv("../data/HotelListing.csv")
df_shopping_malls = pd.read_csv("../data/singapore_mall_locations.csv")


In [20]:
df_hotels.sample(3)

Unnamed: 0,Accomodation Name,Hotel Keeper,Postal Code,Address,Hotel Tel No,Email Address,Hotel Fax No,Hotel Url,Status
387,ibis budget Singapore West Coast,Sun Yi,118759,418 Pasir Panjang Road,6567779888,contact@gphl.com.sg,,www.ibisbudget-singapore.com,Active
563,Pasir Panjang Inn,Ang Poh Seow,118741,404 Pasir Panjang Road,6567788511,ppinn@pacific.net.sg,,www.ppinn.com.sg/,Active
219,Goodwood Park Hotel,"Khoo Bee Geok, Mavis",228221,22 Scotts Road,6567377411,enquiries@goodwoodparkhotel.com.sg,,www.goodwoodparkhotel.com,Active


In [21]:
df_shopping_malls.sample(3)

Unnamed: 0,mall_names,latitude,longitude,address,postal_code,cleaned_mall_names
160,Queensway Shopping Centre,1.287558,103.801285,"1 Queensway, Singapore 149053",149 053,queensway shopping centre
94,Woodlands Civic Centre,1.434911,103.784597,"900 S Woodlands Dr, Singapore 730900",730 900,woodlands civic centre
32,Orchard Central,1.300635,103.837691,"181 Orchard Rd, Singapore 238896",238 896,orchard central


In [22]:
# clean dataset
df_shopping_malls["postal_code"] = df_shopping_malls["postal_code"].apply(
    lambda x: re.sub("\s+", "", x)
)

df_hotels["cleaned_name"] = df_hotels["Accomodation Name"].str.lower()
df_hotels["postal_code"] = df_hotels["Postal Code"].astype(str)
df_hotels["postal_code"] = df_hotels["postal_code"].apply(
    lambda x: f"{'0' * (6 - len(x))}{x}" if len(x) == 5 else x
)

### Group into the below Categories

- Retail (Shopping Malls, Shophouses)
- Leisure (Tourist Attractions, Entertainment, Fitness areas, Restaurants)
- Communal (Community Centers, Libraries, Security Posts, Residents' Committee, Church or Temples)
- Healthcare (Hospitals/Clinics)
- Education (Schools, Hostels, Special Needs Centres)
- Commercial (Offices, Official Government Services)
- Private Residential
- HDB
- MSCP

In [23]:
def assign_category(postal_code, building_name):
    # ---------------------- Healthcare ----------------------
    list_of_healthcare_keywords = [
        "hospital",
        "hospital/medical centre",
        "polyclinic"
    ]
    for x in list_of_healthcare_keywords:
        if re.search(x, building_name):
            return "Healthcare"

    # ---------------------- Schools ----------------------
    list_of_school_keywords = [
        "school", "college", "skool",
        "ntu", "smu", "nus", "nie", 
        "singapore institute of management", 
        "polytechnic", "uwcsea", "mdis", "bca academy", "tca hostel trinity", 
        "lycee francais de singapour", "st andrew autism centre"
    ]
    for x in list_of_school_keywords:
        if re.search(x, building_name):
            return "Education"

    # ---------------------- Retail ----------------------
    # Mall and Commercial Centers
    lst_of_mall_keywords = [
        "mall",
        "shopping", 
        "shoppers", 
        "shophouse",
        "novena square", 
        "balestier plaza", 
        "orchard point", 
        "joo chiat complex", 
        "bras basah complex", 
        "vivocity", 
        "ue square",
        "shaw house",
        "giant building",
        "customs house",
        "psa nursery hawaii landscape"
    ]
    if postal_code in list(df_shopping_malls["postal_code"]) or building_name in list(df_shopping_malls["cleaned_mall_names"]):
        return "Retail"
    for x in lst_of_mall_keywords:
        if re.search(x, building_name):
            return "Retail"

    # ---------------------- Community ----------------------
    list_of_communal_keywords = [
        "residents\'* committee",
        "library", 
        "kampong",
        "coast guard",
        "\s+c\s*c",
        "columbarium",
        "cemetry",
        "temple",
        "church",
        "bible",
        "community",
        "disabled",
        "post office",
        "circle line"
    ]
    for x in list_of_communal_keywords:
        if re.search(x, building_name):
            return "Communal"

    # ---------------------- Leisure ----------------------
    lst_of_leisure_keywords = [
        "safra club mount faber",
        "chinese swimming complex",
        "home team academy",
        "universal studios singapore resorts world sentosa",
        "hortpark visitor centre",
        "singapore flyer",
        "chinese garden",
        "jurong bird park",
        "satay by the bay",
        "jurong central park - mcdonald",
        "aramsa spa @ bishan park",
        "villa halia",
        "sentosa",
        "marina barrage",
        "tanglin club",
        "museum",
        "marina south pier",
        "concert hall",
        "singapore botanic gardens",
        "country club"
    ]
    for x in lst_of_leisure_keywords:
        if re.search(x, building_name):
            return "Leisure"

    # ---------------------- Hotels ----------------------
    if postal_code in list(df_hotels["postal_code"]):
        return "Hotel"
    
    if building_name in list(df_hotels["cleaned_name"]):
        return "Hotel"

    list_of_hotels = ["hotel", "mandarin oriental", "ritz carlton", "fraser place robertson walk"]
    for x in list_of_hotels:
        if re.search(x, building_name):
            return "Hotel"

    # ---------------------- HDB ----------------------
    if building_name == "hdb residential":
        return "HDB"
    list_of_hdb_keywords = [
        "[a-z]\d",
        "teck ghee vista", 
        "treelodge @ punggol", 
        "53a strathmore avenue singapore 143053", 
        "23a telok blangah crescent singapore 091023",
        "block 840a yishun street 81 singapore 761840",
        "820 thomson road singapore 574623",
        "tiong bahru view"
    ]
    for x in list_of_hdb_keywords:
        if re.search(x, building_name):
            return "HDB"
    
    #MSCP
    if "mscp" in building_name:
        return "MSCP"
    
    # ---------------------- Private Residences ----------------------
    list_of_pte_residences_keywords = [
        "liang seah place",
        "craig place",
        "condominium",
        "no. 11 geylang lorong 35 - level 6",
        "claymore point",
        "winsland house i / lanson place"
    ]
    for x in list_of_pte_residences_keywords:
        if re.search(x, building_name):
            return "Private Residential"

    return "Commercial"

In [24]:
df_skyrise_greenery["TYPE"] = df_skyrise_greenery.apply(
    lambda x: assign_category(x["POSTAL_COD"], x["PROJECT_NA"]), axis=1
)

In [25]:
df_skyrise_greenery["TYPE"].value_counts()

HDB                    103
Commercial              78
Education               61
Retail                  41
Hotel                   38
MSCP                    35
Leisure                 20
Communal                16
Healthcare              11
Private Residential      6
Name: TYPE, dtype: int64

### Export to new Shapefile

In [26]:
df_new_skyrise_greenery = pd.merge(gdf_skyrise_greenery, df_skyrise_greenery, on=["POSTAL_COD", "ADDRESS", "PROJECT_NA"])

In [32]:
df_new_skyrise_greenery["PROJECT_NA"] = df_new_skyrise_greenery["PROJECT_NA"].apply(
    lambda x: x.title() if x != "mscp" else "MSCP"
)

In [35]:
gdf_new_skyrise_greenery = export_df_to_shapefile(df_new_skyrise_greenery, "skyrise_greenery")

In [37]:
gdf_new_skyrise_greenery.sample(5)

Unnamed: 0,X,Y,POSTAL_COD,GARDENID,ADDRESS,PROJECT_NA,INC_CRC,FMEL_UPD_D,geometry,TYPE
145,30873.080963,32327.5869,208581,96,9 King George's Avenue Singapore 208581,People'S Association Headquarters,B801B314FC2187BE,20160317140030,POINT Z (103.85913 1.30863 0.00000),Commercial
242,36461.258599,42277.0877,820180,189,180 Edgefield Plains Singapore 820180,MSCP,1117EDD772B74984,20160317140030,POINT Z (103.90935 1.39861 0.00000),MSCP
378,32989.380946,41797.9602,790430,473,430 Fernvale Link Singapore 790430,Sengkang N4C18A (S) - Fernvale Ridge,4167CA2128B8B2D5,20160317140030,POINT Z (103.87815 1.39428 0.00000),HDB
122,29306.534879,28463.456400000003,79908,73,81 Anson Road Singapore 079908,M Hotel,C215DEA38A4F0131,20160317140030,POINT Z (103.84506 1.27369 0.00000),Hotel
196,24448.348941,34416.416201,269734,428,661 Bukit Timah Road 269734,Hwa Chong Institution (Main School Building),5B7E5847A95B68C8,20160317140030,POINT Z (103.80141 1.32752 0.00000),Education


In [38]:
gdf_new_skyrise_greenery.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich