In [None]:
import pandas as pd
import numpy as np

import requests
import time
from typing import Dict, List, Optional
from shaply.geometry import Point, Polygon, shape

import geopandas as gpd
from bs4 import BeautifulSoup

# get lat, lon

In [None]:

def lookup_postcode_details(postcode: str) -> Dict:

    # OneMap Search API endpoint
    api_url = "https://www.onemap.gov.sg/api/common/elastic/search"

    # API
    params = {
        'searchVal': postcode,
        'returnGeom': 'Y',
        'getAddrDetails': 'Y',
        'pageNum': 1
    }
    headers = {"Authorization": "Bearer **********************"}

    try:
        # request
        response = requests.get(api_url, params=params, headers=headers, timeout=10)
        response.raise_for_status()
        
        data = response.json()
        
        if data['found'] > 0:
            result = data['results'][0]
            
            return {
                'latitude': float(result.get('LATITUDE', 0)),
                'longitude': float(result.get('LONGITUDE', 0)),
            }
        else:
            return {
                'postcode': postcode,
                'status': 'not_found'
            }
            
    except requests.exceptions.RequestException as e:
        return {
            'postcode': postcode,
            'status': 'error',
            'error': str(e)
        }



def batch_lookup_postcodes(postcodes: List[str], token: Optional[str] = None, delay: float = 0.1) -> pd.DataFrame:
    
    results = []
    total = len(postcodes)
    
    for i, postcode in enumerate(postcodes):
        # clean
        clean_postcode = str(postcode).strip().zfill(6)
        
        # query
        result = lookup_postcode_details(clean_postcode)
        results.append(result)
        
        # progress bar
        if (i + 1) % 10 == 0 or (i + 1) == total:
            print(f"Done: {i + 1}/{total}")
        
        # delay 
        if delay > 0 and i < total - 1:
            time.sleep(delay)
    
    df = pd.DataFrame(results)

    return df

In [None]:
# save result
df_childcare = pd.read_csv('./data/3_ListingofCentres.csv')

# get unique post codes
df_childcare = df_childcare[df_childcare['postal_code'] > 0]
ls_postal = df_childcare['postal_code'].astype(int).unique().tolist()  #1813 unique value

# run batch lookup
df = batch_lookup_postcodes(ls_postal)

In [None]:
# save result
df['postcode'] = ls_postal
df.to_csv('./data_processed/postcode_ll.csv', index=False)

# map to subzone

In [None]:
# Load the subzone boundaries shapefile
subzone_gdf = gpd.read_file('./data/MasterPlan2019SubzoneBoundaryNoSea/Master Plan 2019 Subzone Boundary (No Sea) (GEOJSON).geojson')


In [None]:
def getName(html):
    soup = BeautifulSoup(html, 'html.parser')

    #  <th>SUBZONE_N</th> follow <td>。
    th = soup.find('th', string='SUBZONE_N')
    if th:
        td = th.find_next_sibling('td')
        subzone_name = td.get_text(strip=True) if td else None
    else:
        subzone_name = None

    print("Subzone:", subzone_name)
    return subzone_name

def getSubzone(x):
    if np.isnan(x.longitude):
        return ''
        
    point = Point(x.longitude, x.latitude)  
    
    mask = subzone_gdf.geometry.contains(point)
    matching_rows = subzone_gdf[mask]
    
    html = matching_rows['Description'].values[0]
    return getName(html)

In [45]:
df['Subzone'] = df.apply(lambda x: getSubzone(x), axis=1)

Subzone: ULU PANDAN
Subzone: GOMBAK
Subzone: HONG KAH NORTH
Subzone: TOH TUCK
Subzone: UPPER PAYA LEBAR
Subzone: ROBERTSON QUAY
Subzone: BUKIT BATOK CENTRAL
Subzone: SIMEI
Subzone: BIDADARI
Subzone: HONG KAH
Subzone: UPPER THOMSON
Subzone: BOON LAY PLACE
Subzone: SELETAR HILLS
Subzone: XILIN
Subzone: FERNVALE
Subzone: LORONG AH SOO
Subzone: FERNVALE
Subzone: KATONG
Subzone: HOUGANG CENTRAL
Subzone: HILLVIEW
Subzone: GEYLANG EAST
Subzone: WOODLANDS EAST
Subzone: WENYA
Subzone: YIO CHU KANG EAST
Subzone: KOVAN
Subzone: WENYA
Subzone: GARDEN
Subzone: BEDOK SOUTH
Subzone: PLANTATION
Subzone: MATILDA
Subzone: CLEMENTI NORTH
Subzone: HONG KAH
Subzone: TIONG BAHRU STATION
Subzone: MACPHERSON
Subzone: HONG KAH
Subzone: TEBAN GARDENS
Subzone: BUKIT BATOK CENTRAL
Subzone: HILLVIEW
Subzone: FERNVALE
Subzone: TAMPINES EAST
Subzone: TAMPINES EAST
Subzone: TRAFALGAR
Subzone: CECIL
Subzone: SEMBAWANG NORTH
Subzone: SEMBAWANG NORTH
Subzone: TOH TUCK
Subzone: YISHUN SOUTH
Subzone: KEMBANGAN
Subzone: KA

In [None]:
# save result
df.to_csv('./data_processed/postcode_ll_subzone.csv', index=False)