## Load libraries

In [46]:
import numpy as np
import requests
import json
import pandas as pd

## Read in file

In [47]:
bible = pd.read_csv("BinCentresInspections_Mar_Dec_2020_XY.csv")

In [48]:
bible.head()
bible.info()
bible["Latitude"]=bible["Latitude"].astype(float)
bible["Longitude"]=bible["Longitude"].astype(float)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3243 entries, 0 to 3242
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   address         3243 non-null   object 
 1   Latitude        3243 non-null   float64
 2   Longitude       3243 non-null   float64
 3   Postal Code     3238 non-null   object 
 4   Town Council    3243 non-null   object 
 5   Date Of Survey  3243 non-null   object 
 6   x_new           3242 non-null   float64
 7   y_new           3242 non-null   float64
dtypes: float64(4), object(4)
memory usage: 202.8+ KB


Do some data cleaning

In [49]:
bible_set = bible.drop_duplicates(["address","Latitude","Longitude"])
bible_set.reset_index(inplace=True)
bible_set.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2417 entries, 0 to 2416
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   index           2417 non-null   int64  
 1   address         2417 non-null   object 
 2   Latitude        2417 non-null   float64
 3   Longitude       2417 non-null   float64
 4   Postal Code     2413 non-null   object 
 5   Town Council    2417 non-null   object 
 6   Date Of Survey  2417 non-null   object 
 7   x_new           2416 non-null   float64
 8   y_new           2416 non-null   float64
dtypes: float64(4), int64(1), object(4)
memory usage: 170.1+ KB


Write function to convert XY to Lat and Long

In [50]:
#need to do the conversion to get lat/long for the old file
def xy_to_latlong(x,y):
    x,y=x,y
    query_string=f"https://developers.onemap.sg/commonapi/convert/3414to4326?X={x}&Y={y}"
    query_string
    resp = requests.get(query_string)
    data=json.loads(resp.content)
    if data:
        return(data["latitude"],data["longitude"])

    else:
        return(np.nan,np.nan)

In [51]:
xy_to_latlong(27454.84598,45916.47028)

(1.4315267319894855, 103.82841985785846)

Read in the bible dataset

In [9]:
masterframe = pd.read_csv("New_Bin_Centres_2.csv",na_values=["NA","","-"])



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [11]:
#filter to binary check = False and drop rows with postal_check = NA
input_file = masterframe[masterframe["binary_check"]==False]
input_file.dropna(subset=["postal_check"],inplace=True)
input_file.shape
input_file.drop_duplicates(["X.output","Y.output"],inplace=True)

(1083, 68)

The above code and converting from XY to Lat Long can take awhile, I've already saved the results and you can load it from this file

In [76]:
input_file = pd.read_csv("input_new.csv")

In [77]:
input_file.shape

(1083, 71)

In [78]:
input_new.drop_duplicates(["X.output","Y.output"],inplace=True)

### KDTree

In [55]:
from sklearn.neighbors import BallTree, DistanceMetric

In [56]:
dist = DistanceMetric.get_metric('haversine')

In [57]:
bible_tree = BallTree(np.radians(bible_set[["Latitude","Longitude"]]), metric=dist)

In [58]:
coords = np.radians(input_new[['latitude_new', 'longitude_new']])

In [59]:
dists, ilocs = bible_tree.query(coords)

In [60]:
input_new['dist'] = dists.flatten() * 6367

In [73]:
bible_set.head()

Unnamed: 0,index,address,Latitude,Longitude,Postal Code,Town Council,Date Of Survey,x_new,y_new
0,0,Blk 330 Serangoon Ave 3,1.3499,103.87002,550330,MPTC,30/9/2020 0:00,32084.4946,36890.70402
1,1,Blk 330 Serangoon Ave 3,1.35009,103.87004,550330,SERO,3/6/2020 0:00,32086.72006,36911.71329
2,3,Blk 24 Hougang Avenue 3,1.36488,103.8922,530024,AHTC,24/12/2020,34552.84082,38547.16538
3,4,Blk 24 Hougang Avenue 3,1.36492,103.89212,530024,AHTC,25/8/2020 0:00,34543.93766,38551.58816
4,5,Blk 319 Jurong East Street 31,1.3481,103.73047,600319,SWRO,11/5/2020 0:00,16554.12724,36691.88018


In [64]:
input_new['nearest_bin'] = bible_set.iloc[ilocs.flatten()]['index'].values

Merge 2 files together

In [65]:
nearest_bin = pd.merge(input_new,bible_set,left_on=["nearest_bin"],right_on=["index"])

In [75]:
nearest_bin[["address_x","dist","nearest_bin"]].head()

Unnamed: 0,address_x,dist,nearest_bin
0,Blk 112 Yishun Ring Road,0.097023,385
1,Blk 136 Yishun Ring Road,0.064872,537
2,Blk 136 Yishun Ring Rd,0.064872,537
3,Blk 133 Yishun St 11,0.085142,537
4,Blk 136 Yishun St 11,0.064872,537


In [72]:
nearest_bin.to_csv("nearest_bin_kdtree.csv",index=False)