In [1]:
# importing libraries
import folium
import pandas as pd
import numpy as np
import time
import geocoder

# restoring backup of the geocoding process
backup = pd.HDFStore('backup.h5')
streets = backup['streets']
df_address = backup['address']

In [2]:
# San Francisco latitude and longitude values
lat = 37.77
long = -122.42

In [3]:
sanfran_map=folium.Map(location=[lat, long], zoom_start=12)
sanfran_map

In [4]:
# read in the data
df_streets=pd.read_csv("Potential_Bus_Stops.csv")

#show the data
df_streets.head()


Unnamed: 0,Street_One,Street_Two
0,MISSION ST,ITALY AVE
1,MISSION ST,NEW MONTGOMERY ST
2,MISSION ST,01ST ST
3,MISSION ST,20TH ST
4,MISSION ST,FREMONT ST


In [5]:
#shape of the dataset
df_streets.shape

(119, 2)

In [6]:
# rename columns
df_streets.rename(columns={"Street_One": "Str1", "Street_Two":"Str2"}, inplace=True)
df_streets.head()

Unnamed: 0,Str1,Str2
0,MISSION ST,ITALY AVE
1,MISSION ST,NEW MONTGOMERY ST
2,MISSION ST,01ST ST
3,MISSION ST,20TH ST
4,MISSION ST,FREMONT ST


In [7]:
# Coordinates of the intersection with index 0
inter=df_streets.iloc[0]
intersection=inter[0]+' & '+inter[1] + ", San Francisco, US"
result = geocoder.arcgis(intersection)

In [8]:
print("Address: ", result.address)
print("Latitude: ", result.lat)
print("Longitude: ", result.lng)
print(result.latlng)

Address:  Mission St & Italy Ave, San Francisco, California, 94112
Latitude:  37.71846
Longitude:  -122.4395325
[37.71846, -122.4395325]


In [9]:
# Getting the coordinates of all the places
# This will take a lot of time because of the sleep() function. 
# I had to use this to match the rate limit of arcis
lats, lngs=[], []
for str1, str2 in zip(df_streets.Str1, df_streets.Str2):
    intersection=str1 + ' & ' + str2 + ", San Francisco, US"
    time.sleep(0)
    result=geocoder.arcgis(intersection)
    lats.append(result.lat)
    lngs.append(result.lng)
    print(result.lat, result.lng, end='\n\n')

37.71846 -122.4395325

37.787445000000005 -122.400504

37.79000927940522 -122.39726220078478

37.758627000000004 -122.419044

37.790469 -122.3966925

37.76997600000001 -122.41992150000002

37.76906700000001 -122.42004299999999

37.791152999999994 -122.395842

37.745577000000004 -122.4198675

37.733949 -122.4261225

37.71765 -122.4401355

37.78837200000001 -122.399334

37.784043 -122.404833

37.72873800000001 -122.431284

37.714608000000005 -122.4426555

37.742382000000006 -122.42196449999999

37.791846 -122.3949735

37.730772 -122.42925450000001

37.76665500000001 -122.41982250000001

37.741743 -122.422383

37.728234 -122.4317835

37.727712000000004 -122.43234600000001

37.73664000000001 -122.42423249999999

37.709541 -122.450517

37.793240999999995 -122.393196

37.70830608338589 -122.45424798660152

37.762713000000005 -122.41946250000001

37.79000927940522 -122.39726220078478

37.782189 -122.407173

37.716381 -122.44100399999999

37.741005 -122.422842

37.755449999999996 -122.41874250

In [10]:
print(len(lats), len(lngs))

119 119


In [11]:
# add the latitudes and longitudes
streets=df_streets.copy()
streets['lat']=lats
streets['lng']=lngs
streets.head()

Unnamed: 0,Str1,Str2,lat,lng
0,MISSION ST,ITALY AVE,37.71846,-122.439532
1,MISSION ST,NEW MONTGOMERY ST,37.787445,-122.400504
2,MISSION ST,01ST ST,37.790009,-122.397262
3,MISSION ST,20TH ST,37.758627,-122.419044
4,MISSION ST,FREMONT ST,37.790469,-122.396693


In [12]:
# initialize a Feature Group for the intersections
intersections=folium.map.FeatureGroup()

# loop through all the intersections and add them to the Feature Group
for lat, lng in zip(streets.lat, streets.lng):
    intersections.add_child(
    folium.features.CircleMarker(
    [lat, lng],
    radius=5, # define how big you want the circle markers to be
    color='yellow',
    fill=True,
    fill_color='blue',
    fill_opacity=0.6))

# add intersections to the map
sanfran_map.add_child(intersections)

In [13]:
df_address = pd.read_csv('Employee_Addresses.csv')
print(df_address.shape)
df_address.head()

(1761, 2)


Unnamed: 0,address,employee_id
0,"B Mission St, San Francisco, CA 94112, USA",1110
1,"1 Bemis St, San Francisco, CA 94131, USA",1522
2,"1 Bernice St, San Francisco, CA 94103, USA",79
3,"1 Naylor St, San Francisco, CA 94112, USA",693
4,"1 Waterville St, San Francisco, CA 94124, USA",1349


In [14]:
# The coordinates of the address with index 0
test_address = df_address.iloc[0]
res_address = geocoder.arcgis(test_address)
print(res_address.lat, res_address.lng)

37.72025425925963 -122.4381914969639


In [15]:
# Finding the cooordinates of all the address
lats, lngs = [], []
API_KEY = "AjCph2mbbZbc3I2cGTH_d0zoj51eVBYsff6RKDzZHvOs0uoK7IJa1JTh2yvvakGV"
for address in df_address['address']:
    result = geocoder.arcgis(address)
    lats.append(result.lat)
    lngs.append(result.lng)
    if result.lat == None:
        print("Failed Query")

In [16]:
df_address['lat'] = lats
df_address['lng'] = lngs
df_address = df_address.mask(df_address.astype(str).eq("None")).dropna()
print(df_address.shape)
df_address.head()

(1761, 4)


Unnamed: 0,address,employee_id,lat,lng
0,"B Mission St, San Francisco, CA 94112, USA",1110,37.720254,-122.438191
1,"1 Bemis St, San Francisco, CA 94131, USA",1522,37.738164,-122.428868
2,"1 Bernice St, San Francisco, CA 94103, USA",79,37.770162,-122.413815
3,"1 Naylor St, San Francisco, CA 94112, USA",693,37.711286,-122.434084
4,"1 Waterville St, San Francisco, CA 94124, USA",1349,37.736127,-122.402769


In [17]:
# initialize a FeatureGroup for the addresses
addresses = folium.map.FeatureGroup()

# Add the coordinates of the addresses to the FeatureGroup
for lat, lng in zip(df_address['lat'], df_address['lng']):
    addresses.add_child(
    folium.features.CircleMarker(
    [lat, lng],
    radius=5, # define how big you want the circle markers to be
    color='red',
    fill=True,
    fill_color='green',
    fill_opacity=0.6))
    
# Display the FeatureGroup in the map
sanfran_map.add_child(addresses)

In [18]:
X = df_address.iloc[:, 2:]
X.head()

Unnamed: 0,lat,lng
0,37.720254,-122.438191
1,37.738164,-122.428868
2,37.770162,-122.413815
3,37.711286,-122.434084
4,37.736127,-122.402769


In [19]:
from sklearn.cluster import KMeans

estimator = KMeans(n_clusters = 10)
estimator.fit(X)
best_spots = estimator.cluster_centers_.T
best_spots

array([[  37.72659746,   37.79254873,   37.76755937,   37.7120544 ,
          37.77029024,   37.73501767,   37.71006526,   37.73621946,
          37.74290197,   37.78492917],
       [-122.42674663, -122.44349413, -122.4307574 , -122.44184817,
        -122.41397985, -122.40266713, -122.41233438, -122.4354654 ,
        -122.42227173, -122.39589632]])

In [20]:
# initialize a FeatureGroup for the potential stops
potential_stops = folium.map.FeatureGroup()

# Add the coordinates of the addresses to the FeatureGroup
for lat, lng in zip(best_spots[0], best_spots[1]):
    addresses.add_child(
    folium.features.CircleMarker(
    [lat, lng],
    radius=5, # define how big you want the circle markers to be
    color='black',
    fill=True,
    fill_color='orange',
    fill_opacity=0.6))
    
# Display the FeatureGroup in the map
sanfran_map.add_child(potential_stops)

In [21]:
df_address.sort_values('lat')

Unnamed: 0,address,employee_id,lat,lng
1663,"889 Schwerin St, Daly City, CA 94014, USA",1815,37.702875,-122.414704
1658,"870 Schwerin St, Daly City, CA 94014, USA",1215,37.703091,-122.414710
1660,"875 Schwerin St, Daly City, CA 94014, USA",960,37.703256,-122.414535
1648,"852 Schwerin St, Daly City, CA 94014, USA",310,37.703352,-122.414592
817,"281 Oriente St, Daly City, CA 94014, USA",578,37.703399,-122.415461
1644,"850 Schwerin St, Daly City, CA 94014, USA",563,37.703406,-122.414571
1656,"869 Schwerin St, Daly City, CA 94014, USA",1298,37.703420,-122.414462
1538,"724 Templeton Ave, Daly City, CA 94014, USA",1957,37.703629,-122.450600
771,"264 Oriente St, Daly City, CA 94014, USA",477,37.703686,-122.415413
1633,"838 Schwerin St, Daly City, CA 94014, USA",1168,37.703750,-122.414421


In [26]:
# pseudo-distance between two points
def haversine(lat1, lon1, lat2, lon2):
    import math

    R = 6371000  # radius of Earth in meters
    phi_1 = math.radians(lat1)
    phi_2 = math.radians(lat2)

    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)

    a = math.sin(delta_phi / 2.0) ** 2 + math.cos(phi_1) * math.cos(phi_2) * math.sin(delta_lambda / 2.0) ** 2
    
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    
    # output distance in meters
    return R * c  
    

size_of_clusters_centers = 10
size_of_nominees = streets.shape[0]
INF = 100000000
best_spots = estimator.cluster_centers_
print(best_spots)

best_potential = []
for center in best_spots:
    min_distance = INF
    min_distance_lat = -1
    min_distance_lng = -1
    for j in range(size_of_nominees):
        distance_to_main_line = haversine(center[0], 
                                         center[1], 
                                         streets.loc[j, 'lat'], 
                                         streets.loc[j, 'lng'])
        if distance_to_main_line < min_distance:
            min_distance_lat = streets.loc[j, 'lat']
            min_distance_lng = streets.loc[j, 'lng']
            min_distance = distance_to_main_line
    best_potential.append([min_distance_lat, min_distance_lng])   
best_potential

[[  37.72659746 -122.42674663]
 [  37.79254873 -122.44349413]
 [  37.76755937 -122.4307574 ]
 [  37.7120544  -122.44184817]
 [  37.77029024 -122.41397985]
 [  37.73501767 -122.40266713]
 [  37.71006526 -122.41233438]
 [  37.73621946 -122.4354654 ]
 [  37.74290197 -122.42227173]
 [  37.78492917 -122.39589632]]


[[37.72923300000001, -122.4308025],
 [37.7731395, -122.4186435],
 [37.76828400000001, -122.41997549999999],
 [37.713375, -122.4439605],
 [37.77100200000001, -122.4196335],
 [37.742382000000006, -122.42196449999999],
 [37.721565, -122.4371835],
 [37.730772, -122.42925450000001],
 [37.74282300000001, -122.421708],
 [37.78837200000001, -122.399334]]

In [31]:
final_sanfran_map=folium.Map(location=[37.77, -122.42], zoom_start=12)

# initialize a FeatureGroup for the 
best_potential_feature = folium.map.FeatureGroup()

# Add the coordinates of the addresses to the FeatureGroup
for point in best_potential:
    lat = point[0]
    lng = point[1]
    print(lat, lng)
    best_potential_feature.add_child(
    folium.features.CircleMarker(
    [lat, lng],
    radius=10, # define how big you want the circle markers to be
    color='black',
    fill=True,
    fill_color='orange',
    fill_opacity=0.6))
    
# Display the FeatureGroup in the map
final_sanfran_map.add_child(intersections)
final_sanfran_map.add_child(best_potential_feature)

37.72923300000001 -122.4308025
37.7731395 -122.4186435
37.76828400000001 -122.41997549999999
37.713375 -122.4439605
37.77100200000001 -122.4196335
37.742382000000006 -122.42196449999999
37.721565 -122.4371835
37.730772 -122.42925450000001
37.74282300000001 -122.421708
37.78837200000001 -122.399334


In [30]:
backup = pd.HDFStore("backup.h5")
backup['streets'] = streets
backup['address'] = df_address