In [1]:
import pandas as pd
import json
import math

In [3]:
# Assuming your GeoJSON is stored in a file named 'hawkers.geojson'
with open('../datasets/HawkerCentresGEOJSON.geojson') as hawkers_df:
    data = json.load(hawkers_df)

# Create lists to store the data
hawker_data = []

for feature in data['features']:
    hawker_name = feature['properties']['NAME']
    street_name = feature['properties']['ADDRESSSTREETNAME']
    longitude = feature['geometry']['coordinates'][0]
    latitude = feature['geometry']['coordinates'][1]
    
    hawker_data.append({
        'Hawker Name': hawker_name,
        'Street Name': street_name,
        'Longitude': longitude,
        'Latitude': latitude
    })

# Create DataFrame
df = pd.DataFrame(hawker_data)
df.to_csv('../datasets/hawker_centres.csv', index=False)

In [2]:
def calculate_distance(lat1, lon1, lat2, lon2):
    """
    Calculate the distance between two coordinates in meters
    
    Parameters:
    lat1, lon1: Latitude and Longitude of point 1
    lat2, lon2: Latitude and Longitude of point 2
    
    Returns:
    Distance in meters
    """
    # Radius of the Earth in meters
    R = 6371000

    # Convert latitude and longitude to radians
    lat1_rad = math.radians(float(lat1))
    lon1_rad = math.radians(float(lon1))
    lat2_rad = math.radians(float(lat2))
    lon2_rad = math.radians(float(lon2))

    # Differences in coordinates
    dlat = lat2_rad - lat1_rad
    dlon = lon2_rad - lon1_rad

    # Haversine formula
    a = math.sin(dlat/2)**2 + math.cos(lat1_rad) * math.cos(lat2_rad) * math.sin(dlon/2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    distance = R * c

    return round(distance)  # Returns distance in meters, rounded to nearest meter

In [10]:
def find_nearby_hawkers(lat, lon, hawkers_df, max_distance=1000):
    """
    Find hawker centers within max_distance meters of a property
    """
    nearby_hawkers = []
    
    for _, hawker in hawkers_df.iterrows():
        distance = calculate_distance(
            lat, 
            lon,
            hawker['Latitude'], 
            hawker['Longitude']
        )
        
        if distance <= max_distance:
            nearby_hawkers.append(hawker['Hawker Name'])
    
    return '; '.join(nearby_hawkers) if nearby_hawkers else None

In [None]:
hawkers_df = pd.read_csv('../datasets/supplementary_datasets/hawker_centers_coordinates.csv')


for district_num in range(2, 29):
    try:        
        # Read the updated dataset
        df = pd.read_csv(f'../datasets/updated_coordinates/district{district_num}.csv')

        # Filter out rows without coordinates
        df = df.dropna(subset=['Latitude', 'Longitude'])

        # Create new column for Hawker Centers if it doesn't exist
        if 'Nearby Hawker Centers' not in df.columns:
            df['Nearby Hawker Centers'] = None

        # Process each unique project
        for project_name in df['Project Name'].unique():
            try:
                # Get the first row for this project
                project_row = df[df['Project Name'] == project_name].iloc[0]
                
                print(f"Processing: {project_name}")
                                
                # Get nearby hawker centers
                nearby_hawkers = find_nearby_hawkers(
                    project_row['Latitude'],
                    project_row['Longitude'],
                    hawkers_df
                )
                
                # Update all rows for this project using .loc
                df.loc[df['Project Name'] == project_name, 'Nearby Hawker Centers'] = nearby_hawkers
                
            except Exception as e:
                print(f"Error processing {project_name} in district {district_num}: {str(e)}")
                continue

        # Save the updated dataset
        df.to_csv(f'../datasets/updated_coordinates/district{district_num}.csv', index=False)
        print(f"Completed district {district_num}")
        
    except Exception as e:
        print(f"Error processing district {district_num}: {str(e)}")
        continue

In [7]:
def find_nearby_malls(lat, lon, malls_df, max_distance=1000):
    """
    Find shopping malls within max_distance meters of a property
    """
    nearby_malls = []
    
    for _, mall in malls_df.iterrows():
        distance = calculate_distance(
            lat, 
            lon,
            mall['Latitude'], 
            mall['Longitude']
        )
        
        if distance <= max_distance:
            nearby_malls.append(mall['Mall Name'])
    
    return '; '.join(nearby_malls) if nearby_malls else None

In [None]:
malls_df = pd.read_csv('../datasets/supplementary_datasets/shopping_malls_coordinates.csv')


for district_num in range(2, 29):
    try:        
        # Read the updated dataset
        df = pd.read_csv(f'../datasets/updated_coordinates/district{district_num}.csv')

        # Filter out rows without coordinates
        df = df.dropna(subset=['Latitude', 'Longitude'])

        # Create new column for Malls if it doesn't exist
        if 'Shopping Malls Within Radius of 1km' not in df.columns:
            df['Shopping Malls Within Radius of 1km'] = None

        # Process each unique project
        for project_name in df['Project Name'].unique():
            try:
                # Get the first row for this project
                project_row = df[df['Project Name'] == project_name].iloc[0]
                
                print(f"Processing: {project_name}")
                                
                # Get nearby hawker centers
                nearby_malls = find_nearby_malls(
                    project_row['Latitude'],
                    project_row['Longitude'],
                    malls_df
                )
                
                # Update all rows for this project using .loc
                df.loc[df['Project Name'] == project_name, 'Shopping Malls Within Radius of 1km'] = nearby_malls
                
            except Exception as e:
                print(f"Error processing {project_name} in district {district_num}: {str(e)}")
                continue

        # Save the updated dataset
        df.to_csv(f'../datasets/updated_coordinates/district{district_num}.csv', index=False)
        print(f"Completed district {district_num}")
        
    except Exception as e:
        print(f"Error processing district {district_num}: {str(e)}")
        continue

## Hospitals

In [13]:
def find_nearby_hospitals(lat, lon, hospitals_df, max_distance=5000):
    """
    Find hospitals within max_distance meters of a property
    """
    nearby_hospitals = []
    
    for _, hospital in hospitals_df.iterrows():
        distance = calculate_distance(
            lat, 
            lon,
            hospital['Latitude'], 
            hospital['Longitude']
        )
        
        if distance <= max_distance:
            nearby_hospitals.append(hospital['Hospital Name'])
    
    return '; '.join(nearby_hospitals) if nearby_hospitals else None

In [None]:
hospitals_df = pd.read_csv('../datasets/supplementary_datasets/hospitals.csv')

for district_num in range(2, 29):
    try:        
        # Read the updated dataset
        df = pd.read_csv(f'../datasets/updated_coordinates/district{district_num}.csv')

        # Filter out rows without coordinates
        df = df.dropna(subset=['Latitude', 'Longitude'])

        # Create new column for Malls if it doesn't exist
        if 'Hospitals Within Radius of 5km' not in df.columns:
            df['Hospitals Within Radius of 5km'] = None

        # Process each unique project
        for project_name in df['Project Name'].unique():
            try:
                # Get the first row for this project
                project_row = df[df['Project Name'] == project_name].iloc[0]
                
                print(f"Processing: {project_name}")
                                
                # Get nearby hawker centers
                nearby_hospitals = find_nearby_hospitals(
                    project_row['Latitude'],
                    project_row['Longitude'],
                    hospitals_df
                )
                
                # Update all rows for this project using .loc
                df.loc[df['Project Name'] == project_name, 'Hospitals Within Radius of 5km'] = nearby_hospitals
                
            except Exception as e:
                print(f"Error processing {project_name} in district {district_num}: {str(e)}")
                continue

        # Save the updated dataset
        df.to_csv(f'../datasets/updated_coordinates/district{district_num}.csv', index=False)
        print(f"Completed district {district_num}")
        
    except Exception as e:
        print(f"Error processing district {district_num}: {str(e)}")
        continue

## Schools

In [20]:
def find_nearby_schools(lat, lon, schools_df, max_distance=2000):
    """
    Find schools within max_distance meters of a property
    """
    nearby_schools = []
    
    for _, school in schools_df.iterrows():
        distance = calculate_distance(
            lat, 
            lon,
            school['Latitude'], 
            school['Longitude']
        )
        
        if distance <= max_distance:
            nearby_schools.append(school['School Name'])
    
    return '; '.join(nearby_schools) if nearby_schools else None

In [28]:
schools_df = pd.read_csv('../datasets/supplementary_datasets/schools.csv')

for district_num in range(2, 29):
    try:        
        # Read the updated dataset
        df = pd.read_csv(f'../datasets/updated_coordinates/district{district_num}.csv')

        # Filter out rows without coordinates
        df = df.dropna(subset=['Latitude', 'Longitude'])

        # Create new column for Malls if it doesn't exist
        if 'Schools Within Radius of 2km' not in df.columns:
            df['Schools Within Radius of 2km'] = None

        # Process each unique project
        for project_name in df['Project Name'].unique():
            try:
                # Get the first row for this project
                project_row = df[df['Project Name'] == project_name].iloc[0]
                
                print(f"Processing: {project_name}")
                                
                # Get nearby hawker centers
                nearby_schools = find_nearby_schools(
                    project_row['Latitude'],
                    project_row['Longitude'],
                    schools_df
                )
                
                # Update all rows for this project using .loc
                df.loc[df['Project Name'] == project_name, 'Schools Within Radius of 5km'] = nearby_schools
                
            except Exception as e:
                print(f"Error processing {project_name} in district {district_num}: {str(e)}")
                continue

        # Save the updated dataset
        df.to_csv(f'../datasets/updated_coordinates/district{district_num}.csv', index=False)
        print(f"Completed district {district_num}")
        
    except Exception as e:
        print(f"Error processing district {district_num}: {str(e)}")
        continue

Processing: SKY EVERTON
Processing: SKYSUITES@ANSON
Processing: EON SHENTON
Processing: THE ARRIS
Processing: SPOTTISWOODE RESIDENCES
Processing: WALLICH RESIDENCE
Processing: 76 SHENTON
Processing: SPOTTISWOODE 18
Processing: CRAIG PLACE
Processing: INTERNATIONAL PLAZA
Processing: SPOTTISWOODE SUITES
Processing: SPOTTISWOODE PARK
Processing: LUMIERE
Processing: ICON
Processing: THE BEACON
Processing: ONE BERNAM
Processing: ALTEZ
Processing: DORSETT RESIDENCES
Processing: ONZE @ TANJONG PAGAR
Processing: TMW MAXWELL
Completed district 2
Processing: THE METROPOLITAN CONDOMINIUM
Processing: AVENUE SOUTH RESIDENCE
Processing: THE ANCHORAGE
Processing: ONE PEARL BANK
Processing: REGENCY SUITES
Processing: COMMONWEALTH TOWERS
Processing: RIVER PLACE
Processing: THE CREST
Processing: PROMENADE PEAK
Processing: QUEENS PEAK
Processing: HIGHLINE RESIDENCES
Processing: STIRLING RESIDENCES
Processing: RIVIERE
Processing: ECHELON
Processing: PRINCIPAL GARDEN
Processing: MARGARET VILLE
Processing: 