In [1]:
import pandas as pd
from geopy.distance import geodesic
import numpy as np

In [2]:
#load both dataframe with hdb gps lat,long and
#school csv
df_unique = pd.read_csv('data/unique_address_mrt.csv')

In [3]:
# distance from hdb to CBD
raffles_place_mrt = (1.284125148,103.8514384)
df_unique['CBD_distance']=df_unique.apply(lambda x : geodesic(raffles_place_mrt,(x['latitude'],x['longitude'])).km,axis=1)

In [4]:
df_unique.head()

Unnamed: 0,address,latitude,longitude,mrt,nearest_mrt_distance,mrt_less_than_half_km,mrt_less_than_one_km,CBD_distance
0,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2,9.200287
1,216 ang mo kio ave 1,1.366197,103.841505,mayflower mrt,0.800629,0,2,9.142146
2,211 ang mo kio ave 3,1.369197,103.841667,mayflower mrt,0.620303,0,2,9.469459
3,202 ang mo kio ave 3,1.368446,103.844516,ang mo kio mrt,0.580039,0,2,9.355586
4,235 ang mo kio ave 3,1.366824,103.836491,mayflower mrt,0.513092,0,2,9.294443


In [5]:
import json
import requests

def get_gps(df,col):
    '''
    This function will obtain the latitude and longitude using oneMAP api
    
    Keyword arguments:
    df : df name
    col : name of the column to retrieve
    '''
    locations = list(df[col])
    latitude_list = []
    longitude_list = []
    for location in locations:
        url= "https://developers.onemap.sg/commonapi/search?returnGeom=Y&getAddrDetails=Y&pageNum=1&searchVal="+location
        response = requests.get(url)
        data = json.loads(response.text) 
        try:
            latitude_list.append(data['results'][0]['LATITUDE'])
            longitude_list.append(data['results'][0]['LONGITUDE'])
          
        except:
            latitude_list.append(None)
            longitude_list.append(None)
    
    df["latitude"] = latitude_list
    df["longitude"] = longitude_list
    
    return df

In [6]:
df_mall= pd.read_csv('data/mall.csv')

In [7]:
df_mall = get_gps(df_mall,'mall')

In [8]:
df_mall.head()

Unnamed: 0,mall,latitude,longitude
0,100 AM,1.274588218,103.8434707
1,313@Somerset,1.301385102,103.8376844
2,Aperia,1.30974242,103.8641016
3,Balestier Hill Shopping Centre,1.326124169,103.8437095
4,Bugis Cube,1.298195005,103.8556555


In [9]:
df_mall.loc[df_mall.isnull()['latitude'],:]

Unnamed: 0,mall,latitude,longitude
9,Clarke Quay the Central,,
43,pomo retail,,
99,Jubilee Square,,


In [10]:
#manual input for the above 3 malls
df_mall.loc[9,['latitude']] = 1.2890306
df_mall.loc[9,['longitude']] = 103.8444211

df_mall.loc[43,['latitude']] = 1.300219
df_mall.loc[43,['longitude']] = 103.8469757

df_mall.loc[99,['latitude']] = 1.3717079
df_mall.loc[99,['longitude']] = 103.8454328


In [11]:
df_mall.isnull().sum()

mall         0
latitude     0
longitude    0
dtype: int64

In [12]:
def nearest_loc(lat,long,df_loc_compare):
    '''
    This function returns 
    nearest location,nearest location distance, 
    number of loc that is less than or equals to one km.
    
    Keyword arguments:
    lat -- latitude of the location
    long -- longitude of the location
    df_loc_compare -- (target) name of the df(i.e mrt or shopping mall)
                      column 'latitude' and 'longitude' must exist
       
    '''
    flat_loc = (lat,long)
    loc_dict = {}
    for _,row in df_loc_compare.iterrows():     
        mrt_loc=(row['latitude'],row['longitude'])
        loc_dict[row['mall']] = geodesic(flat_loc,mrt_loc).km           
    less_than_one_km    = len([loc for loc,distance in loc_dict.items() if distance <= 1])
    nearest_loc = min(loc_dict, key=loc_dict.get)
    nearest_loc_distance = min(loc_dict.values())
    
    return nearest_loc,nearest_loc_distance,less_than_one_km

In [13]:
df_unique['mall'], df_unique['nearest_mall_distance'],\
df_unique['mall_less_than_one_km']\
= zip(*df_unique.apply(lambda x : nearest_loc(x['latitude'],x['longitude'],df_mall),axis=1))

In [14]:
df_unique.head()

Unnamed: 0,address,latitude,longitude,mrt,nearest_mrt_distance,mrt_less_than_half_km,mrt_less_than_one_km,CBD_distance,mall,nearest_mall_distance,mall_less_than_one_km
0,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2,9.200287,Jubilee Square,1.130965,0
1,216 ang mo kio ave 1,1.366197,103.841505,mayflower mrt,0.800629,0,2,9.142146,Jubilee Square,0.749952,3
2,211 ang mo kio ave 3,1.369197,103.841667,mayflower mrt,0.620303,0,2,9.469459,Jubilee Square,0.502752,3
3,202 ang mo kio ave 3,1.368446,103.844516,ang mo kio mrt,0.580039,0,2,9.355586,Jubilee Square,0.374781,3
4,235 ang mo kio ave 3,1.366824,103.836491,mayflower mrt,0.513092,0,2,9.294443,Jubilee Square,1.132208,0


In [15]:
df_unique.isnull().sum()

address                  0
latitude                 0
longitude                0
mrt                      0
nearest_mrt_distance     0
mrt_less_than_half_km    0
mrt_less_than_one_km     0
CBD_distance             0
mall                     0
nearest_mall_distance    0
mall_less_than_one_km    0
dtype: int64

In [16]:
df_unique.to_csv('data/unique_address_mrt_CBD_mall.csv',index=False)

In [17]:
columns = ['address','CBD_distance','mall','nearest_mall_distance', 'mall_less_than_one_km']

In [18]:
df_unique=df_unique[columns]

In [19]:
df_unique.head()

Unnamed: 0,address,CBD_distance,mall,nearest_mall_distance,mall_less_than_one_km
0,309 ang mo kio ave 1,9.200287,Jubilee Square,1.130965,0
1,216 ang mo kio ave 1,9.142146,Jubilee Square,0.749952,3
2,211 ang mo kio ave 3,9.469459,Jubilee Square,0.502752,3
3,202 ang mo kio ave 3,9.355586,Jubilee Square,0.374781,3
4,235 ang mo kio ave 3,9.294443,Jubilee Square,1.132208,0


In [20]:
df = pd.read_csv('data/address_mrt_school.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [21]:
df.head()

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,...,address,latitude,longitude,mrt,nearest_mrt_distance,mrt_less_than_half_km,mrt_less_than_one_km,school,nearest_school_distance,school_less_than_one_km
0,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,improved,1977,9000.0,...,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2,Catholic High School,1.527978,0
1,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,improved,1977,6000.0,...,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2,Catholic High School,1.527978,0
2,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,improved,1977,8000.0,...,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2,Catholic High School,1.527978,0
3,1990-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,07 TO 09,31.0,improved,1977,6000.0,...,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2,Catholic High School,1.527978,0
4,1990-02,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,04 TO 06,31.0,improved,1977,8000.0,...,309 ang mo kio ave 1,1.366045,103.83697,bright hill mrt,0.546874,0,2,Catholic High School,1.527978,0


In [22]:
df.columns

Index(['month', 'town', 'flat_type', 'block', 'street_name', 'storey_range',
       'floor_area_sqm', 'flat_model', 'lease_commence_date', 'resale_price',
       'date', 'year_sold', 'month_sold', 'remaining_lease', 'address',
       'latitude', 'longitude', 'mrt', 'nearest_mrt_distance',
       'mrt_less_than_half_km', 'mrt_less_than_one_km', 'school',
       'nearest_school_distance', 'school_less_than_one_km'],
      dtype='object')

In [23]:
# merge df_unique to df . saves us a lot of time
df = pd.merge(df, df_unique, on='address',how='left')

In [24]:
#check if there is any null value
df.isnull().sum()

month                      0
town                       0
flat_type                  0
block                      0
street_name                0
storey_range               0
floor_area_sqm             0
flat_model                 0
lease_commence_date        0
resale_price               0
date                       0
year_sold                  0
month_sold                 0
remaining_lease            0
address                    0
latitude                   0
longitude                  0
mrt                        0
nearest_mrt_distance       0
mrt_less_than_half_km      0
mrt_less_than_one_km       0
school                     0
nearest_school_distance    0
school_less_than_one_km    0
CBD_distance               0
mall                       0
nearest_mall_distance      0
mall_less_than_one_km      0
dtype: int64

In [25]:
df.to_csv('data/address_mrt_school_cbd_mall.csv',index=False)

In [None]:
#end