In [1]:
import requests

In [None]:
searchVal = "406 ANG MO KIO AVE 10"
url = url = f"https://www.onemap.gov.sg/api/common/elastic/search?searchVal={searchVal}&returnGeom=Y&getAddrDetails=Y&pageNum=1"
headers = {"Authorization": "ENTER_TOKEN_HERE" }

In [4]:
response = requests.get(url, headers=headers)
print(response.text)

{
  "found": 1,
  "totalNumPages": 1,
  "pageNum": 1,
  "results": [
    {
      "SEARCHVAL": "406 ANG MO KIO AVENUE 10 SINGAPORE 560406",
      "BLK_NO": "406",
      "ROAD_NAME": "ANG MO KIO AVENUE 10",
      "BUILDING": "NIL",
      "ADDRESS": "406 ANG MO KIO AVENUE 10 SINGAPORE 560406",
      "POSTAL": "560406",
      "X": "30288.2346631354",
      "Y": "38229.0674628187",
      "LATITUDE": "1.36200453938712",
      "LONGITUDE": "103.853879910407"
    }
  ]
}


In [None]:
import json
data = json.loads(response.text)

In [15]:
data['results']

[{'SEARCHVAL': '406 ANG MO KIO AVENUE 10 SINGAPORE 560406',
  'BLK_NO': '406',
  'ROAD_NAME': 'ANG MO KIO AVENUE 10',
  'BUILDING': 'NIL',
  'ADDRESS': '406 ANG MO KIO AVENUE 10 SINGAPORE 560406',
  'POSTAL': '560406',
  'X': '30288.2346631354',
  'Y': '38229.0674628187',
  'LATITUDE': '1.36200453938712',
  'LONGITUDE': '103.853879910407'}]

In [19]:
import pandas as pd
import os
import re

FILE = "resale.csv"

# 1. Ensure the file exists
if not os.path.exists(FILE):
    raise FileNotFoundError(f"{FILE} does not exist")

# 2. Load CSV
df = pd.read_csv(FILE)

print("=== Columns Found ===")
print(df.columns.tolist(), "\n")

# 3. Check required columns
required = ["block", "street_name"]

missing_cols = [col for col in required if col not in df.columns]
if missing_cols:
    raise ValueError(f"Missing required columns: {missing_cols}")

print("Required columns are present.\n")

# 4. Check for missing values
print("=== Missing Values ===")
print(df[required].isna().sum(), "\n")

# 5. Show sample rows
print("=== First 10 rows ===")
print(df[required].head(10), "\n")

# 6. Show unique block formats (help detect oddities)
print("=== Unique Block Formats ===")
print(df["block"].astype(str).unique()[:20], "\n")

# 7. Find strange block values
pattern_block = r"^[0-9A-Za-z\-]+$"  # allow 10, 10A, 548C, 10-20, etc.

strange_blocks = df[~df["block"].astype(str).str.match(pattern_block)]
if len(strange_blocks) > 0:
    print("=== WARNING: Strange Block Values ===")
    print(strange_blocks["block"].unique(), "\n")
else:
    print("All block values look normal.\n")

# 8. Show sample unique street names
print("=== Sample Street Names ===")
print(df["street_name"].unique()[:20])

=== Columns Found ===
['month', 'town', 'flat_type', 'block', 'street_name', 'storey_range', 'floor_area_sqm', 'flat_model', 'lease_commence_date', 'remaining_lease', 'resale_price'] 

Required columns are present.

=== Missing Values ===
block          0
street_name    0
dtype: int64 

=== First 10 rows ===
  block        street_name
0   406  ANG MO KIO AVE 10
1   108   ANG MO KIO AVE 4
2   602   ANG MO KIO AVE 5
3   465  ANG MO KIO AVE 10
4   601   ANG MO KIO AVE 5
5   150   ANG MO KIO AVE 5
6   447  ANG MO KIO AVE 10
7   218   ANG MO KIO AVE 1
8   447  ANG MO KIO AVE 10
9   571   ANG MO KIO AVE 3 

=== Unique Block Formats ===
['406' '108' '602' '465' '601' '150' '447' '218' '571' '534' '233' '235'
 '219' '536' '230' '570' '624' '441' '625' '119'] 

All block values look normal.

=== Sample Street Names ===
['ANG MO KIO AVE 10' 'ANG MO KIO AVE 4' 'ANG MO KIO AVE 5'
 'ANG MO KIO AVE 1' 'ANG MO KIO AVE 3' 'ANG MO KIO AVE 9'
 'ANG MO KIO AVE 8' 'ANG MO KIO AVE 6' 'ANG MO KIO ST 52'
 'B

In [None]:
import requests
import pandas as pd
import time
import json
import os

# =========================
# CONFIG
# =========================
INPUT_FILE = "resale.csv"
OUTPUT_FILE = "resale_with_geocode.json.csv"
CACHE_FILE = "geocode_cache_full.json"

RATE_LIMIT_SLEEP = 0.30   # 0.3 sec = ~200/min
MAX_RETRIES = 5


# =========================
# LOAD CACHE
# =========================
if os.path.exists(CACHE_FILE):
    with open(CACHE_FILE, "r") as f:
        cache = json.load(f)
else:
    cache = {}



def geocode_address(block, street):
    """Return FULL results list from OneMap for a given HDB block & street."""

    addr = f"{block} {street}"
    url = f"https://www.onemap.gov.sg/api/common/elastic/search?searchVal={addr}&returnGeom=Y&getAddrDetails=Y&pageNum=1"
    headers = {"Authorization": "ENTER_TOKEN_HERE" }
    # Return from cache if available
    if addr in cache:
        return cache[addr]



    # Retry logic
    for attempt in range(MAX_RETRIES):
        try:
            r = requests.get(url, headers=headers, timeout=10)

            if r.status_code == 429:  # rate limited
                time.sleep(2 ** attempt)
                continue

            r.raise_for_status()

            data = r.json()

            # Store the entire list of results
            results = data.get("results", [])

            cache[addr] = results
            return results

        except Exception:
            time.sleep(2 ** attempt)

    # On total failure
    cache[addr] = []
    return []


# =========================
# MAIN PROCESS
# =========================
print("Loading resale.csv...")
df = pd.read_csv(INPUT_FILE)

df["full_address"] = df["block"].astype(str) + " " + df["street_name"]

unique_addresses = df["full_address"].unique()
print(f"Unique addresses to geocode: {len(unique_addresses)}")


# Geocode unique addresses
addr_to_results = {}

for addr in unique_addresses:
    block, street = addr.split(" ", 1)
    results = geocode_address(block, street)
    addr_to_results[addr] = results
    time.sleep(RATE_LIMIT_SLEEP)


# Add results to dataframe (as JSON string)
df["geocode_results"] = df["full_address"].map(
    lambda addr: json.dumps(addr_to_results[addr])
)

# Save updated cache
with open(CACHE_FILE, "w") as f:
    json.dump(cache, f, indent=2)

df.to_csv(OUTPUT_FILE, index=False)
print(f"Done! Saved to {OUTPUT_FILE}")

Loading resale.csv...
Unique addresses to geocode: 9663
Done! Saved to resale_with_geocode.json.csv
