In [8]:
import requests
import os
from dotenv import load_dotenv
import time
import pandas as pd

In [5]:
# Load environment variables
load_dotenv()

True

In [6]:
# Authentication
def get_onemap_token():
    url = "https://www.onemap.gov.sg/api/auth/post/getToken"
    payload = {
        "email": os.getenv('ONEMAP_EMAIL'),
        "password": os.getenv('ONEMAP_PASSWORD')
    }
    response = requests.post(url, json=payload)
    return response.json().get('access_token')

# Geocoding function
def geocode_address(address, token):
    url = "https://www.onemap.gov.sg/api/common/elastic/search"
    params = {
        "searchVal": address,
        "returnGeom": "Y",
        "getAddrDetails": "Y",
        "pageNum": 1
    }
    headers = {
        "Authorization": f"Bearer {token}"
    }
    response = requests.get(url, params=params, headers=headers)
    return response.json()

In [7]:
original_file = '../3. GeocodingPrep/3.1.cleaned_address_out.csv'
output_file = '4.2.updated_file_with_geolocation.csv'
df = pd.read_csv(original_file)

# Initialize lat_long column if it doesn't exist
if 'lat_long' not in df.columns:
    df['lat_long'] = None

# Process addresses in batches of 1000
token = get_onemap_token()
batch_size = 1000

for start_idx in range(0, len(df), batch_size):
    end_idx = min(start_idx + batch_size, len(df))
    batch = df.iloc[start_idx:end_idx]
    
    # Only process rows where lat_long is empty
    mask = batch['lat_long'].isna()
    batch_to_process = batch[mask]
    
    if not batch_to_process.empty:
        print(f"\nProcessing batch {start_idx//batch_size + 1}, rows {start_idx} to {end_idx}")
        
        for address_idx, address in batch_to_process['cleaned_address'].items():
            result = geocode_address(address, token)
            if result.get('results'):
                location = result['results'][0]
                lat = float(location['LATITUDE'])
                lon = float(location['LONGITUDE'])
                df.at[address_idx, 'lat_long'] = (lat, lon)
                print(f"Processed: {address} -> {(lat, lon)}")
            else:
                print(f"No results found for: {address}")
            
            time.sleep(1)  # Rate limiting delay
        
        # Save progress after each batch
        df.to_csv(output_file, index=False)
        print(f"Saved progress after batch {start_idx//batch_size + 1}")

print("Geocoding completed")

Address: 2 Alexandra Rd, Delta House, Singapore 159919
Coordinates: 1.27425442821763, 103.803711567804

Address: 1 Joo Koon Cir, FairPrice Joo Koon, Singapore 629117
Coordinates: 1.32476879097421, 103.674484690433

Address: 1 Yishun Industrial Street 1, A'Posh BizHub, Singapore 768160
Coordinates: 1.43732110123747, 103.842085763701

Address: 5 Engineering Drive 1, Block E8, Singapore 117608
Coordinates: 1.29877546585606, 103.771795736896

