HDB coordinates

In [20]:
import pandas as pd
from shapely.geometry import shape
import json
import re

# Load the GeoJSON file
file_path = '../data/HDB/HDBExistingBuilding.geojson'
with open(file_path, 'r') as f:
    geojson_data = json.load(f)
centroids_data = []

postal_code_pattern = r'POSTAL_COD<\/th> <td>(\d{6})<\/td>'

for feature in geojson_data['features']:
    # Use Shapely to construct the shape from the geometry and calculate the centroid
    polygon = shape(feature['geometry'])
    centroid = polygon.centroid

    # Extract postal code using regex from the description
    description = feature['properties']['Description']
    match = re.search(postal_code_pattern, description)
    postal_code = match.group(1) if match else None

    # Append the centroid coordinates, postal code, and other properties to the list
    centroids_data.append({
        'Latitude': centroid.y,
        'Longitude': centroid.x,
        'PostalCode': postal_code
    })

# Convert the list of centroid data into a pandas DataFrame
hdb_coordinates = pd.DataFrame(centroids_data)

# Return the pandas DataFrame
hdb_coordinates

# Save the pandas DataFrame to a CSV file
hdb_coordinates.to_csv('../data/HDB/hdb_coordinates.csv', index=False)

private property coordinates

In [21]:
file_path = '../data/HDB/URANoofDwellingUnitscurrentyear.geojson'

# Read the GeoJSON file
with open(file_path, 'r') as f:
    geojson_data = json.load(f)

# Initialize an empty list to hold the data
data = []

# Regex pattern to match postal codes within the description
postal_code_pattern = r'POSTALCODE<\/th> <td>(\d{6})<\/td>'

# Loop through each feature in the GeoJSON file
for feature in geojson_data['features']:
    # Extract coordinates directly since it's a Point geometry
    coordinates = feature['geometry']['coordinates']
    longitude = coordinates[0]
    latitude = coordinates[1]

    # Extract postal code using regex from the description
    description = feature['properties']['Description']
    match = re.search(postal_code_pattern, description)
    postal_code = match.group(1) if match else None

    # Append the coordinates and postal code to the list
    data.append({
        'Latitude': latitude,
        'Longitude': longitude,
        'PostalCode': postal_code
    })

# Convert the list of data into a pandas DataFrame
private_coordinates = pd.DataFrame(data)

# Show the DataFrame
private_coordinates.to_csv('../data/HDB/private_coordinates.csv', index=False)

optional: reverse geocode coordinates to get addresses: v slow

In [1]:
import requests
import time
import pandas as pd
from shapely.geometry import shape
import json

from dotenv import load_dotenv
import os
import requests

load_dotenv()
payload = {
        "email": os.getenv("ONE_MAP_EMAIL"),
        "password": os.getenv("ONE_MAP_PASSWORD")
      }
api_key = requests.request("POST", "https://www.onemap.gov.sg/api/auth/post/getToken", json=payload)
api_key = api_key.json()["access_token"]

base_url = "https://www.onemap.gov.sg/api/public/revgeocode"

# Headers for the API request
headers = {
    "Authorization": f"Bearer {api_key}"
}

# Add a new column for addresses
df_centroids['Address'] = None

# Loop over the DataFrame rows
for index, row in df_centroids.iterrows():
    # Construct the API URL for reverse geocoding
    url = f"{base_url}?location={row['Latitude']},{row['Longitude']}&addressType=All&otherFeatures=N"

    # Make the API request
    response = requests.get(url, headers=headers)
    
    # Check the response status
    if response.status_code == 200:
        # Parse the response JSON and get the address information
        data = response.json()
        if data.get('GeocodeInfo'):  # Check if there is geocode information in the response
            # Update the DataFrame with the address
            df_centroids.at[index, 'Address'] = data['GeocodeInfo'][0]['BUILDINGNAME']
        print(f"Reverse geocoded {row['Latitude']},{row['Longitude']} out of {len(df_centroids)}")
    else:
        print(f"Failed to reverse geocode. Status code: {response.status_code}, Response content: {response.text}")

    # Respect the API rate limits; add a slight delay between requests
    time.sleep(1)  # Adjust the delay as per the API provider's rate limit guidelines

# Save the DataFrame with the addresses to a CSV file

#df_centroids.to_csv('../data/HDB/hdb_blocks_with_addresses.csv', index=False)


KeyboardInterrupt: 

In [3]:
import pandas as pd
from sklearn.cluster import KMeans
import numpy as np
hdb_coordinates = pd.read_csv('../data/HDB/hdb_coordinates.csv')
private_coordinates = pd.read_csv('../data/HDB/private_coordinates.csv')

hdb_coords = np.array(hdb_coordinates[['Latitude', 'Longitude']])
priv_coords = np.array(private_coordinates[['Latitude', 'Longitude']])

n_clusters = 55*10

# Perform K-means clustering
hdb_kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(hdb_coords)
priv_kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(priv_coords)
hdb_cluster_centroids = hdb_kmeans.cluster_centers_
priv_cluster_centroids = priv_kmeans.cluster_centers_
#save the centroids to a csv file
np.savetxt('../data/Cluster_data/hdb_cluster_centroids.csv', hdb_cluster_centroids, delimiter=',')
np.savetxt('../data/Cluster_data/priv_cluster_centroids.csv', priv_cluster_centroids, delimiter=',')

import folium

# Initialize the map centered around Singapore
# The center is an approximate central point (you may adjust it based on your data)
sg_map = folium.Map(location=[1.3521, 103.8198], zoom_start=12)

# Plot HDB cluster centroids in blue
for coord in hdb_cluster_centroids:
    folium.CircleMarker(
        location=[coord[0], coord[1]],
        radius=5,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(sg_map)

# Plot private cluster centroids in red
for coord in priv_cluster_centroids:
    folium.CircleMarker(
        location=[coord[0], coord[1]],
        radius=5,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=0.6
    ).add_to(sg_map)

# Display the map
sg_map.save('../data/Cluster_data/cluster_centroids_map.html')

  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
