In [1]:
import pandas as pd
from geopy.distance import geodesic
import numpy as np

In [2]:
#load both dataframe with hdb gps lat,long and
#school csv
df_unique = pd.read_csv('data/unique_address_mrt.csv')
df_school = pd.read_csv('data/topschools.csv', encoding = "ISO-8859-1")

In [3]:
df_unique.head()

Unnamed: 0,address,latitude,longitude,mrt,nearest_mrt_distance,mrt_less_than_half_km,mrt_less_than_one_km
0,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2
1,216 ang mo kio ave 1,1.366197,103.841505,mayflower mrt,0.800629,0,2
2,211 ang mo kio ave 3,1.369197,103.841667,mayflower mrt,0.620303,0,2
3,202 ang mo kio ave 3,1.368446,103.844516,ang mo kio mrt,0.580039,0,2
4,235 ang mo kio ave 3,1.366824,103.836491,mayflower mrt,0.513092,0,2


# #based on https://numberoneproperty.com/top-10-tips-for-getting-into-good-school-in-singapore/

In [4]:
df_school.head()

Unnamed: 0,schools,latitude,longitude
0,CHIJ St. Nicholas Girls' School,,
1,Catholic High School,,
2,Nan Hua Primary School,,
3,Nanyang Primary School,,
4,Pei Hwa Presbyterian Primary School,,


We consider schools =<1 km and also between 1 and 2km <br>
https://www.99.co/blog/singapore/p1-registration-strategy-distance/

In [5]:
import json
import requests

def get_gps(df,col):
    '''
    This function will obtain the latitude and longitude using oneMAP api
    
    Keyword arguments:
    df : df name
    col : name of the column to retrieve
    '''
    locations = list(df[col])
    latitude_list = []
    longitude_list = []
    for location in locations:
        url= "https://developers.onemap.sg/commonapi/search?returnGeom=Y&getAddrDetails=Y&pageNum=1&searchVal="+location
        response = requests.get(url)
        data = json.loads(response.text) 
        try:
            latitude_list.append(data['results'][0]['LATITUDE'])
            longitude_list.append(data['results'][0]['LONGITUDE'])
          
        except:
            latitude_list.append(None)
            longitude_list.append(None)
    
    df["latitude"] = latitude_list
    df["longitude"] = longitude_list
    
    return df

In [6]:
# obtain MRT GPS
df_school = get_gps(df_school,'schools')
df_school.head()
df_school.to_csv('data/school_done.csv',index=False)

In [7]:
# Manual input for CHIJ St. Nicholas Girls' School
df_school.loc[0,'latitude']=1.3740364
df_school.loc[0,'longitude']=103.8319816

In [8]:
df_school.head()

Unnamed: 0,schools,latitude,longitude
0,CHIJ St. Nicholas Girls' School,1.37404,103.832
1,Catholic High School,1.3547888769999998,103.8449341
2,Nan Hua Primary School,1.319836638,103.761404
3,Nanyang Primary School,1.32111549,103.8064681
4,Pei Hwa Presbyterian Primary School,1.338055078,103.7761082


In [9]:
df_unique = pd.read_csv('data/unique_address_mrt.csv')

In [10]:
def nearest_loc(lat,long,df_loc_compare):
    '''
    This function returns 
    nearest location,nearest location distance, 
    number of loc that is less than or equals to one km.
    
    Keyword arguments:
    lat -- latitude of the location
    long -- longitude of the location
    df_loc_compare -- (target) name of the df(i.e mrt or shopping mall)
                      column 'latitude' and 'longitude' must exist
       
    '''
    flat_loc = (lat,long)
    loc_dict = {}
    for _,row in df_loc_compare.iterrows():     
        mrt_loc=(row['latitude'],row['longitude'])
        loc_dict[row['schools']] = geodesic(flat_loc,mrt_loc).km           
    less_than_one_km    = len([loc for loc,distance in loc_dict.items() if distance <= 1])
    nearest_loc = min(loc_dict, key=loc_dict.get)
    nearest_loc_distance = min(loc_dict.values())
    return nearest_loc,nearest_loc_distance,less_than_one_km

In [11]:
df_unique['school'], df_unique['nearest_school_distance'],\
df_unique['school_less_than_one_km']\
= zip(*df_unique.apply(lambda x : nearest_loc(x['latitude'],x['longitude'],df_school),axis=1))

In [12]:
df_unique.head()

Unnamed: 0,address,latitude,longitude,mrt,nearest_mrt_distance,mrt_less_than_half_km,mrt_less_than_one_km,school,nearest_school_distance,school_less_than_one_km
0,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2,CHIJ St. Nicholas Girls' School,1.043554,0
1,216 ang mo kio ave 1,1.366197,103.841505,mayflower mrt,0.800629,0,2,Catholic High School,1.31789,0
2,211 ang mo kio ave 3,1.369197,103.841667,mayflower mrt,0.620303,0,2,CHIJ St. Nicholas Girls' School,1.203351,0
3,202 ang mo kio ave 3,1.368446,103.844516,ang mo kio mrt,0.580039,0,2,Catholic High School,1.510899,0
4,235 ang mo kio ave 3,1.366824,103.836491,mayflower mrt,0.513092,0,2,CHIJ St. Nicholas Girls' School,0.942316,1


In [13]:
df_unique.isnull().sum()

address                    0
latitude                   0
longitude                  0
mrt                        0
nearest_mrt_distance       0
mrt_less_than_half_km      0
mrt_less_than_one_km       0
school                     0
nearest_school_distance    0
school_less_than_one_km    0
dtype: int64

In [14]:
df_unique.to_csv('data/unique_address_mrt_school.csv',index=False)

In [15]:
columns = ['address','school','nearest_school_distance', 'school_less_than_one_km']

In [16]:
df_unique=df_unique[columns]

In [17]:
df_unique.head()

Unnamed: 0,address,school,nearest_school_distance,school_less_than_one_km
0,309 ang mo kio ave 1,CHIJ St. Nicholas Girls' School,1.043554,0
1,216 ang mo kio ave 1,Catholic High School,1.31789,0
2,211 ang mo kio ave 3,CHIJ St. Nicholas Girls' School,1.203351,0
3,202 ang mo kio ave 3,Catholic High School,1.510899,0
4,235 ang mo kio ave 3,CHIJ St. Nicholas Girls' School,0.942316,1


In [18]:
df = pd.read_csv('data/address_mrt.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [19]:
df.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,...,year_sold,month_sold,remaining_lease,address,latitude,longitude,mrt,nearest_mrt_distance,mrt_less_than_half_km,mrt_less_than_one_km
0,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,improved,1977,9000.0,...,1990,1,86,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2
1,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,improved,1977,6000.0,...,1990,1,86,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2
2,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,improved,1977,8000.0,...,1990,1,86,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2
3,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,07 TO 09,31.0,improved,1977,6000.0,...,1990,1,86,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2
4,1990-02,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,improved,1977,8000.0,...,1990,2,86,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2


In [20]:
# merge df_unique to df . saves us a lot of time
df = pd.merge(df, df_unique, on='address',how='left')

In [21]:
#check if there is any null value
df.isnull().sum()

month                      0
town                       0
flat_type                  0
block                      0
street_name                0
storey_range               0
floor_area_sqm             0
flat_model                 0
lease_commence_date        0
resale_price               0
date                       0
year_sold                  0
month_sold                 0
remaining_lease            0
address                    0
latitude                   0
longitude                  0
mrt                        0
nearest_mrt_distance       0
mrt_less_than_half_km      0
mrt_less_than_one_km       0
school                     0
nearest_school_distance    0
school_less_than_one_km    0
dtype: int64

In [22]:
df.to_csv('data/address_mrt_school.csv',index=False)