### Getting schools within 1km radius of HDB flat
In progress: 2km of flat too

We will first load the `hdb_working_data.csv` file

In [39]:
import sys
sys.path.append('../api')

import pandas as pd
from datetime import datetime
import geopandas as gpd
from shapely.geometry import Point

hdb_cleaned_df = pd.read_csv('../data/modified/hdb_working_data.csv')
hdb_cleaned_df.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,resale_price,address,latitude,longitude,sora,mrt_stations_within_1km,nearest_mrt_station,bto_within_4km,bto_supply_within_4km
0,2015-01,ANG MO KIO,3 ROOM,174,ANG MO KIO AVE 4,07 TO 09,60.0,Improved,1986,70.0,255000.0,174 ANG MO KIO AVE 4,1.375097,103.837619,0.129019,[],Yio Chu Kang MRT Station (1099.56m),12,6587
1,2015-01,ANG MO KIO,3 ROOM,541,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1981,65.0,275000.0,541 ANG MO KIO AVE 10,1.373922,103.855621,0.129019,['Ang Mo Kio MRT Station (811.53m)'],Ang Mo Kio MRT Station (811.53m),39,23252
2,2015-01,ANG MO KIO,3 ROOM,163,ANG MO KIO AVE 4,01 TO 03,69.0,New Generation,1980,64.0,285000.0,163 ANG MO KIO AVE 4,1.373552,103.838169,0.129019,[],Yio Chu Kang MRT Station (1183.8m),10,4941
3,2015-01,ANG MO KIO,3 ROOM,446,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1979,63.0,290000.0,446 ANG MO KIO AVE 10,1.367761,103.855357,0.129019,['Ang Mo Kio MRT Station (703.32m)'],Ang Mo Kio MRT Station (703.32m),34,20043
4,2015-01,ANG MO KIO,3 ROOM,557,ANG MO KIO AVE 10,07 TO 09,68.0,New Generation,1980,64.0,290000.0,557 ANG MO KIO AVE 10,1.371626,103.857736,0.129019,['Ang Mo Kio MRT Station (939.42m)'],Ang Mo Kio MRT Station (939.42m),45,26356


Then we load the `schools` dataset

In [40]:
pri_schools = pd.read_csv('../data/PointsOfInterest/primary_school_coordinates.csv')
pri_schools.head()

Unnamed: 0.1,Unnamed: 0,name,SAP,GEP,Affiliated_Secondary,latitude,longitude
0,0,Admiralty Primary School,0,0,0,1.442635,103.80004
1,1,Ahmad Ibrahim Primary School,0,0,0,1.433153,103.832942
2,2,Ai Tong School,1,0,0,1.360583,103.83302
3,3,Alexandra Primary School,0,0,0,1.291334,103.824425
4,4,Anchor Green Primary School,0,0,0,1.39037,103.887165


Next we create a method to run through the entire `primary_school_coordinates.csv` to compare the distance between the flat and the school.

*For simplicity sake, we have made the assumption that from 2015, there have been no new changes to primary school locations

In [41]:
# method to calculate the distance between two points
def calculate_geospatial_distance(start_coords, end_coords):
    
    # Unpack the tuples
    lat1, lon1 = start_coords
    lat2, lon2 = end_coords
    
    # Create GeoSeries from the points
    point1 = gpd.GeoSeries([Point(lon1, lat1)], crs="EPSG:4326")
    point2 = gpd.GeoSeries([Point(lon2, lat2)], crs="EPSG:4326")
    
    # Reproject to a CRS that uses meters as distance units (e.g., World Mercator)
    point1 = point1.to_crs("EPSG:3395")
    point2 = point2.to_crs("EPSG:3395")
    
    # Calculate the distance between the points
    distance = point1.distance(point2).iloc[0]  # distance in meters
    
    return distance/1000  # convert to kilometers

# Example usage
start_coords = (1.352083, 103.819836) 
end_coords = (1.290270, 103.851959)  

distance = calculate_geospatial_distance(start_coords, end_coords)
print(f"Distance: {distance} kilometers")

Distance: 7.715477804836227 kilometers


### Getting Primary Schools within 1km of flat address

In [42]:
# load bto_supply dataset
pri_schools = pd.read_csv('../data/modified/primary_school_coordinates.csv')
pri_schools.head()

Unnamed: 0.1,Unnamed: 0,name,SAP,GEP,Affiliated_Secondary,latitude,longitude
0,0,Admiralty Primary School,0,0,0,1.442635,103.80004
1,1,Ahmad Ibrahim Primary School,0,0,0,1.433153,103.832942
2,2,Ai Tong School,1,0,0,1.360583,103.83302
3,3,Alexandra Primary School,0,0,0,1.291334,103.824425
4,4,Anchor Green Primary School,0,0,0,1.39037,103.887165


Now we identify the no. of pri_schools within 1km of the flat, and collate them in a list (if there are multiple) for each flat

In [43]:
def get_schools_within_1km(flat_coordinates, pri_schools_df):
    schools = []
    for index, row in pri_schools_df.iterrows():
        school = row['name']
        sch_latitude, sch_longitude = float(row['latitude']), float(row['longitude'])
        coordinates = (sch_latitude, sch_longitude)

        distance = calculate_geospatial_distance(flat_coordinates, coordinates)
        if 0 <= distance < 1: # original: if round(ditance) <= 1
            schools.append(f"{school} ({distance * 1000:.2f}m)")
    return (len(schools), schools)

# Sample: get_schools_within_1km((1.37509746867904, 103.83761896123), pri_schools)

In [44]:
def create_prisch_within_1km_columns(hdb_cleaned_df, pri_sch_df):
    processed_flats = {}
    schs_within_1km = []
    schs_within_1km_list = []

    for index, row in hdb_cleaned_df.iterrows():
        address = row['address']
        if address in processed_flats: #use existing values if alr in dict
            schs_within_1km.append(processed_flats[address][0])
            schs_within_1km_list.append(processed_flats[address][1])
        else:
            flat_coordinates = (float(row['latitude']), float(row['longitude']))
            num_schools, nearest_school_list = get_schools_within_1km(flat_coordinates, pri_sch_df)
            schs_within_1km.append(num_schools)
            schs_within_1km_list.append(nearest_school_list)
            processed_flats[address] = (num_schools, nearest_school_list)
    
    hdb_cleaned_df['pri_schs_within_1km'] = schs_within_1km_list
    hdb_cleaned_df['count_pri_schs_within_1km'] = schs_within_1km
    return hdb_cleaned_df

hdb_cleaned_df = create_prisch_within_1km_columns(hdb_cleaned_df, pri_schools)
hdb_cleaned_df.head(10)

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,...,address,latitude,longitude,sora,mrt_stations_within_1km,nearest_mrt_station,bto_within_4km,bto_supply_within_4km,pri_schs_within_1km,count_pri_schs_within_1km
0,2015-01,ANG MO KIO,3 ROOM,174,ANG MO KIO AVE 4,07 TO 09,60.0,Improved,1986,70.0,...,174 ANG MO KIO AVE 4,1.375097,103.837619,0.129019,[],Yio Chu Kang MRT Station (1099.56m),12,6587,"[Ang Mo Kio Primary School (676.95m), CHIJ St....",3
1,2015-01,ANG MO KIO,3 ROOM,541,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1981,65.0,...,541 ANG MO KIO AVE 10,1.373922,103.855621,0.129019,['Ang Mo Kio MRT Station (811.53m)'],Ang Mo Kio MRT Station (811.53m),39,23252,[Jing Shan Primary School (431.03m)],1
2,2015-01,ANG MO KIO,3 ROOM,163,ANG MO KIO AVE 4,01 TO 03,69.0,New Generation,1980,64.0,...,163 ANG MO KIO AVE 4,1.373552,103.838169,0.129019,[],Yio Chu Kang MRT Station (1183.8m),10,4941,"[Ang Mo Kio Primary School (495.36m), CHIJ St....",3
3,2015-01,ANG MO KIO,3 ROOM,446,ANG MO KIO AVE 10,01 TO 03,68.0,New Generation,1979,63.0,...,446 ANG MO KIO AVE 10,1.367761,103.855357,0.129019,['Ang Mo Kio MRT Station (703.32m)'],Ang Mo Kio MRT Station (703.32m),34,20043,"[Jing Shan Primary School (611.18m), Teck Ghee...",3
4,2015-01,ANG MO KIO,3 ROOM,557,ANG MO KIO AVE 10,07 TO 09,68.0,New Generation,1980,64.0,...,557 ANG MO KIO AVE 10,1.371626,103.857736,0.129019,['Ang Mo Kio MRT Station (939.42m)'],Ang Mo Kio MRT Station (939.42m),45,26356,[Jing Shan Primary School (627.43m)],1
5,2015-01,ANG MO KIO,3 ROOM,603,ANG MO KIO AVE 5,07 TO 09,67.0,New Generation,1980,64.0,...,603 ANG MO KIO AVE 5,1.380201,103.835756,0.129019,[],Yio Chu Kang MRT Station (1026.13m),16,9077,"[Anderson Primary School (771.81m), CHIJ St. N...",3
6,2015-01,ANG MO KIO,3 ROOM,709,ANG MO KIO AVE 8,01 TO 03,68.0,New Generation,1980,64.0,...,709 ANG MO KIO AVE 8,1.371137,103.847662,0.129019,['Ang Mo Kio MRT Station (234.44m)'],Ang Mo Kio MRT Station (234.44m),27,14734,"[Ang Mo Kio Primary School (916.33m), Jing Sha...",3
7,2015-01,ANG MO KIO,3 ROOM,333,ANG MO KIO AVE 1,01 TO 03,68.0,New Generation,1981,65.0,...,333 ANG MO KIO AVE 1,1.361343,103.851699,0.129019,['Ang Mo Kio MRT Station (991.53m)'],Ang Mo Kio MRT Station (991.53m),28,14701,"[Teck Ghee Primary School (476.75m), Townsvill...",2
8,2015-01,ANG MO KIO,3 ROOM,109,ANG MO KIO AVE 4,01 TO 03,67.0,New Generation,1978,62.0,...,109 ANG MO KIO AVE 4,1.370097,103.837688,0.129019,[],Ang Mo Kio MRT Station (1309.39m),8,3717,"[Ang Mo Kio Primary School (232.66m), CHIJ St....",3
9,2015-01,ANG MO KIO,3 ROOM,564,ANG MO KIO AVE 3,13 TO 15,68.0,New Generation,1985,69.0,...,564 ANG MO KIO AVE 3,1.369848,103.859404,0.129019,[],Ang Mo Kio MRT Station (1108.25m),46,27317,[Jing Shan Primary School (851.39m)],1


In [45]:
# check count_pri_schs_within_1km column for any rows with 0 supply
hdb_na = hdb_cleaned_df[hdb_cleaned_df['count_pri_schs_within_1km'] == 0]
hdb_na

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,remaining_lease,...,address,latitude,longitude,sora,mrt_stations_within_1km,nearest_mrt_station,bto_within_4km,bto_supply_within_4km,pri_schs_within_1km,count_pri_schs_within_1km
335,2015-01,CENTRAL AREA,3 ROOM,34,UPP CROSS ST,16 TO 18,65.0,Improved,1975,59.00,...,34 UPP CROSS ST,1.286235,103.842303,0.129019,"['Outram Park MRT Station (678.50m)', 'Chinato...",Chinatown MRT Station (223.2m),9,7841,[],0
337,2015-01,CENTRAL AREA,3 ROOM,538,UPP CROSS ST,10 TO 12,68.0,Improved,1978,62.00,...,538 UPP CROSS ST,1.285212,103.845766,0.129019,"['Raffles Place MRT Station (642.80m)', 'Tanjo...",Chinatown MRT Station (210.29m),8,7441,[],0
338,2015-01,CENTRAL AREA,3 ROOM,532,UPP CROSS ST,13 TO 15,68.0,Improved,1980,64.00,...,532 UPP CROSS ST,1.284194,103.845986,0.129019,"['Raffles Place MRT Station (602.41m)', 'Tanjo...",Chinatown MRT Station (249.89m),9,7841,[],0
414,2015-01,CLEMENTI,3 ROOM,602,CLEMENTI WEST ST 1,04 TO 06,67.0,New Generation,1979,63.00,...,602 CLEMENTI WEST ST 1,1.305104,103.766312,0.129019,[],Clementi MRT Station (1134.22m),5,2791,[],0
423,2015-01,CLEMENTI,3 ROOM,729,CLEMENTI WEST ST 2,07 TO 09,82.0,New Generation,1983,67.00,...,729 CLEMENTI WEST ST 2,1.304927,103.763519,0.129019,[],Clementi MRT Station (1163.53m),5,2791,[],0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211269,2024-03,SEMBAWANG,4 ROOM,107A,CANBERRA ST,13 TO 15,93.0,Model A,2018,93.42,...,107A CANBERRA ST,1.449905,103.832979,3.635582,['Canberra MRT Station (839.88m)'],Canberra MRT Station (839.88m),61,42742,[],0
211271,2024-03,SEMBAWANG,4 ROOM,104B,CANBERRA ST,01 TO 03,93.0,Model A,2018,93.42,...,104B CANBERRA ST,1.450074,103.831377,3.635582,['Canberra MRT Station (797.89m)'],Canberra MRT Station (797.89m),63,44282,[],0
211276,2024-03,SEMBAWANG,5 ROOM,119A,CANBERRA CRES,07 TO 09,113.0,Improved,2018,,...,119A CANBERRA CRES,1.446048,103.831125,3.635582,['Canberra MRT Station (365.95m)'],Canberra MRT Station (365.95m),60,41797,[],0
211279,2024-03,SEMBAWANG,5 ROOM,121D,CANBERRA ST,13 TO 15,113.0,Improved,2018,92.92,...,121D CANBERRA ST,1.447038,103.833923,3.635582,['Canberra MRT Station (642.45m)'],Canberra MRT Station (642.45m),58,40257,[],0


In [46]:
# save the dataframe to csv 
hdb_cleaned_df.to_csv('../data/modified/hdb_working_data.csv', index=False)