# IBM Capstone Week 5
## Optimal Location for Coffee Shop in Toronto
### Calvin Todorovich 6/4/20

##### Presentation is important

In [1]:
#Setting up Libraries
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from bs4 import BeautifulSoup # library to parse HTML and XML documents

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

import urllib

print("Libraries imported.")

Libraries imported.


In [2]:
#Location data

wiki_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
response = requests.get(wiki_url)
soup = BeautifulSoup(response.text, "lxml")

canada_table = soup.find("table",{"class": "wikitable sortable"})


table = canada_table

def get_table_headers(table):
    headers = []
    for th in table.find("tr").find_all("th"):
        headers.append(th.text.strip())

#table
df = pd.read_csv("can_table.csv")

#drop that extra unnamed row
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

#If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.
df.Neighborhood.fillna(df.Borough, inplace = True)

#If a neighborhood is still unassigned, drop it
df = df.replace('Not assigned', np.nan).dropna()

df2 = pd.read_csv(r'C:\Users\Todo\Documents\Geospatial_Coordinates.csv')

TorLoc = pd.merge(left = df, right = df2)
TorLoc.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


In [46]:
#setting up demographics data
data = requests.get('https://en.wikipedia.org/wiki/Demographics_of_Toronto_neighbourhoods').text
soup = BeautifulSoup(data, 'html')


In [70]:
NeighborhoodList = []
BoroughList = []
PopulationList = []
DensityList = []
AvgIncomeList = []
CommutingPercList = []

In [82]:
#Someone keeps editing the wikipedia table and changing its format...
table = soup.findAll('table')[5]
tab_body = table.find('tbody')
rows = tab_body.findAll('tr')

for row in rows:
    cells = row.findAll('td')
    if len(cells) == 13:
        NeighborhoodList.append(cells[0].find(text = True))
        BoroughList.append(cells[1].find(text = True))
        PopulationList.append(cells[3].find(text = True))
        DensityList.append(cells[5].find(text = True))
        AvgIncomeList.append(cells[7].find(text = True))
        CommutingPercList.append(cells[8].find(text = True))

In [75]:
#Someone keeps editing the wikipedia table and changing its format...

# find the table
#soup.find('table').find_all('tr')

# find all the rows of the table
#soup.find('table').find_all('tr')


#tab = soup.find("table",{"class":"wikitable sortable"})
table = soup.findAll('table')[5]
tab_body = table.find('tbody')
rows = tab_body.findAll('tr')


# for each row of the table, find all the table data

for row in rows:
    cells = row.findAll('td')
    if len(cells) == 13:
        NeighborhoodList.append(cells[0].find(text = True))
        BoroughList.append(cells[1].find(text = True))
        PopulationList.append(cells[3].find(text = True))
        DensityList.append(cells[5].find(text = True))
        AvgIncomeList.append(cells[7].find(text = True))
        CommutingPercList.append(cells[8].find(text = True))


In [86]:
demographics_df = pd.DataFrame({"Neighborhood": NeighborhoodList,
    "Borough": BoroughList,
    "Population": PopulationList,
    "Density": DensityList,
    "AvgIncome": AvgIncomeList,
    "Commuting%": CommutingPercList})
demographics_df.head()

Unnamed: 0,Neighborhood,Borough,Population,Density,AvgIncome,Commuting%
0,Crescent Town,EY\n,"8,157\n","20,393\n","23,021\n",24.5\n
1,Governor's Bridge,EY\n,"2,112\n",1129\n,"129,904\n",7.1\n
2,Leaside,EY\n,"13,876\n",4938\n,"82,670\n",9.7\n
3,O'Connor–Parkview,EY\n,"17,740\n",3591\n,"33,517\n",15.8\n
4,Old East York,EY\n,"52,220\n",6577\n,"33,172\n",22.0\n


In [87]:
#Remove the messy stuff from the table
demographics_df = demographics_df.replace(',','', regex=True) #remove the commas
demographics_df = demographics_df.replace('\n','', regex=True) #remove endline characters

demographics_df.head() #Will use this data frame later for profit predicting

#The neighborhood names are so vastly different, I must use borough instead

Unnamed: 0,Neighborhood,Borough,Population,Density,AvgIncome,Commuting%
0,Crescent Town,EY,8157,20393,23021,24.5
1,Governor's Bridge,EY,2112,1129,129904,7.1
2,Leaside,EY,13876,4938,82670,9.7
3,O'Connor–Parkview,EY,17740,3591,33517,15.8
4,Old East York,EY,52220,6577,33172,22.0


In [88]:
#Set up Lat and Long
address = 'Toronto'
geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(TorLoc['Latitude'], TorLoc['Longitude'], TorLoc['Borough'], TorLoc['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    
map_toronto
#all the Neighborhoods in Canada:

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [89]:
#Foursquare data

# define Foursquare Credentials and Version
CLIENT_ID = 'ZKXNTW0JK4NFBVDHSQMDD1KSQGZMSMG5WLZQSZQUPX0O04TT'
CLIENT_SECRET = '1U5YA0JRWGIRWM4P1AGQVEQWTDFKSWIULJT1VO2YIAFJILER'
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ZKXNTW0JK4NFBVDHSQMDD1KSQGZMSMG5WLZQSZQUPX0O04TT
CLIENT_SECRET:1U5YA0JRWGIRWM4P1AGQVEQWTDFKSWIULJT1VO2YIAFJILER


In [90]:
#Get Venues for Toronto
#may need to run it two or three times, foursquare gives me a lot of trouble
radius = 500
LIMIT = 100

venues = []

for lat, long, post, borough, neighborhood in zip(TorLoc['Latitude'], TorLoc['Longitude'], TorLoc['Postal Code'], TorLoc['Borough'], TorLoc['Neighborhood']):
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    for venue in results:
        venues.append((
            post, 
            borough,
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [91]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['PostalCode', 'Borough', 'Neighborhood', 'BoroughLatitude', 'BoroughLongitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(2146, 9)


Unnamed: 0,PostalCode,Borough,Neighborhood,BoroughLatitude,BoroughLongitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,M3A,North York,Parkwoods,43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,M3A,North York,Parkwoods,43.753259,-79.329656,Brookbanks Pool,43.751389,-79.332184,Pool
2,M3A,North York,Parkwoods,43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,M4A,North York,Victoria Village,43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,M4A,North York,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant


In [92]:
#The user input is the Venue Category, I used Coffee Shop because it was extremely common
user_in = "Coffee Shop"

#One Hot Encoding, with user input
toronto_cluster = pd.get_dummies(venues_df[['VenueCategory']] == user_in, prefix="", prefix_sep="")

# add Lat and Long column back to dataframe
toronto_cluster['VenueLatitude'] = venues_df['VenueLatitude']
toronto_cluster['VenueLongitude'] = venues_df['VenueLongitude']
toronto_cluster.head()

Unnamed: 0,VenueCategory,VenueLatitude,VenueLongitude
0,False,43.751976,-79.33214
1,False,43.751389,-79.332184
2,False,43.751974,-79.333114
3,False,43.723481,-79.315635
4,False,43.725819,-79.312785


In [93]:
#K-Clustering
num_clusters = 5

k_means = KMeans(init="k-means++", n_clusters=num_clusters, n_init=12)
k_means.fit(toronto_cluster)
labels = k_means.labels_

print(labels[1:5])
labels

[3 3 3 3]


array([3, 3, 3, ..., 0, 0, 0])

In [94]:
toronto_cluster["Labels"] = labels
toronto_cluster['VenueName'] = venues_df['VenueName']
toronto_cluster.head()

Unnamed: 0,VenueCategory,VenueLatitude,VenueLongitude,Labels,VenueName
0,False,43.751976,-79.33214,3,Brookbanks Park
1,False,43.751389,-79.332184,3,Brookbanks Pool
2,False,43.751974,-79.333114,3,Variety Store
3,False,43.723481,-79.315635,3,Victoria Village Arena
4,False,43.725819,-79.312785,3,Portugril


In [95]:
toronto_cluster['marker_color'] = pd.cut(toronto_cluster['Labels'], bins=5, 
                              labels=['yellow', 'green', 'blue', 'red', 'purple'])
toronto_cluster.head()

Unnamed: 0,VenueCategory,VenueLatitude,VenueLongitude,Labels,VenueName,marker_color
0,False,43.751976,-79.33214,3,Brookbanks Park,red
1,False,43.751389,-79.332184,3,Brookbanks Pool,red
2,False,43.751974,-79.333114,3,Variety Store,red
3,False,43.723481,-79.315635,3,Victoria Village Arena,red
4,False,43.725819,-79.312785,3,Portugril,red


In [96]:
locations = toronto_cluster[['VenueLatitude', 'VenueLongitude']]
locationlist = locations.values.tolist()
#The number of venues in Toronto
len(locationlist)

2146

In [97]:
map_cluster = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, m, category, name in zip(toronto_cluster['VenueLatitude'], toronto_cluster['VenueLongitude'], toronto_cluster['marker_color'], toronto_cluster['VenueCategory'], toronto_cluster['VenueName']):
    label = '{}, {}, {}, {}'.format(category, name, lat, lng)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=m,
        fill=True,
        fill_color=m,
        fill_opacity=0.7,
        parse_html=False).add_to(map_cluster)  
map_cluster

Use a distance matrix to find the most isolated points. This will show us the venues with the least amount of competition.

In [98]:
from sklearn.neighbors import NearestNeighbors
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

In [99]:
coffee = toronto_cluster.loc[toronto_cluster['VenueCategory'] == True]
coffee.head()

Unnamed: 0,VenueCategory,VenueLatitude,VenueLongitude,Labels,VenueName,marker_color
5,True,43.725517,-79.313103,1,Tim Hortons,green
8,True,43.653559,-79.361809,1,Tandem Coffee,green
19,True,43.649963,-79.361442,1,Arvo,green
20,True,43.6519,-79.365609,1,Rooster Coffee,green
23,True,43.658135,-79.359515,1,Sumach Espresso,green


In [100]:
dists = pd.get_dummies(coffee[['VenueLatitude']], prefix="", prefix_sep="")
dists['VenueLongitude'] = coffee['VenueLongitude']
dists.head()

Unnamed: 0,VenueLatitude,VenueLongitude
5,43.725517,-79.313103
8,43.653559,-79.361809
19,43.649963,-79.361442
20,43.6519,-79.365609
23,43.658135,-79.359515


In [101]:
dists2 = pd.get_dummies(toronto_cluster[['VenueLatitude']], prefix="", prefix_sep="")
dists2['VenueLongitude'] = toronto_cluster['VenueLongitude']
dists2.head()
#X2 = dists2.values
X2 = dists2.to_numpy()
X = dists.to_numpy()

#Now we have lat and long for all the venues
#Go one at a time to find nearest coffee distance

t_loc = X2[0,:]
print("Find this value: ", t_loc)

X = np.concatenate(([t_loc], X))
X[0:5]
#It worked

Find this value:  [ 43.75197605 -79.33214045]


array([[ 43.75197605, -79.33214045],
       [ 43.72551663, -79.31310251],
       [ 43.65355871, -79.36180946],
       [ 43.6499628 , -79.36144178],
       [ 43.65189966, -79.36560912]])

In [102]:
for i in range(0,len(toronto_cluster)):
    X2 = dists2.to_numpy() #resets X and X2 every iteration so the point put in last iteration is gone
    X = dists.to_numpy()
    t_loc = X2[i,:]
    X = np.concatenate(([t_loc], X))
    
    nbrs = NearestNeighbors(n_neighbors = 2, algorithm = 'ball_tree').fit(X)
    distances, indices = nbrs.kneighbors(X)

    scaler = StandardScaler()
    scaler.fit(distances)

    #The first column is all zeroes, since it represents the distance between a point and itself
    #The second column represents the distance between a point and the nearest point

    Y = abs(scaler.transform(distances)[:,1]).tolist() #absolute distance
    toronto_cluster.at[i, 'Coffee Distance'] = Y[0] # = the distance from that point the the nearest coffee shop


toronto_cluster.head()

Unnamed: 0,VenueCategory,VenueLatitude,VenueLongitude,Labels,VenueName,marker_color,Coffee Distance
0,False,43.751976,-79.33214,3,Brookbanks Park,red,2.256931
1,False,43.751389,-79.332184,3,Brookbanks Pool,red,2.199774
2,False,43.751974,-79.333114,3,Variety Store,red,2.230391
3,False,43.723481,-79.315635,3,Victoria Village Arena,red,0.008802
4,False,43.725819,-79.312785,3,Portugril,red,0.302962


In [103]:
#Need to combine this with cluster df, joining on the Neighborhood
#First off, remove the total thing

#demographics_df = demographics_df.drop(demographics_df.index[0])


#demographics_df = demographics_df.drop(columns = ['Borough'])
#demographics_df.head()

toronto_cluster['Neighborhood'] = venues_df['Neighborhood']
toronto_cluster['Borough'] = venues_df['Borough']
toronto_cluster.head()

Unnamed: 0,VenueCategory,VenueLatitude,VenueLongitude,Labels,VenueName,marker_color,Coffee Distance,Neighborhood,Borough
0,False,43.751976,-79.33214,3,Brookbanks Park,red,2.256931,Parkwoods,North York
1,False,43.751389,-79.332184,3,Brookbanks Pool,red,2.199774,Parkwoods,North York
2,False,43.751974,-79.333114,3,Variety Store,red,2.230391,Parkwoods,North York
3,False,43.723481,-79.315635,3,Victoria Village Arena,red,0.008802,Victoria Village,North York
4,False,43.725819,-79.312785,3,Portugril,red,0.302962,Victoria Village,North York


In [104]:
toronto_cluster = toronto_cluster.replace('North York','NY', regex=True)
toronto_cluster = toronto_cluster.replace('Scarborough','S', regex=True)
toronto_cluster = toronto_cluster.replace('Downtown Toronto','OCoT', regex=True)
toronto_cluster = toronto_cluster.replace('Etobicoke','E', regex=True)
toronto_cluster = toronto_cluster.replace('East York','EY', regex=True)

In [105]:
toronto_cluster = toronto_cluster.replace('York', 'Y', regex = True)
toronto_cluster.head()

Unnamed: 0,VenueCategory,VenueLatitude,VenueLongitude,Labels,VenueName,marker_color,Coffee Distance,Neighborhood,Borough
0,False,43.751976,-79.33214,3,Brookbanks Park,red,2.256931,Parkwoods,NY
1,False,43.751389,-79.332184,3,Brookbanks Pool,red,2.199774,Parkwoods,NY
2,False,43.751974,-79.333114,3,Variety Store,red,2.230391,Parkwoods,NY
3,False,43.723481,-79.315635,3,Victoria Village Arena,red,0.008802,Victoria Village,NY
4,False,43.725819,-79.312785,3,Portugril,red,0.302962,Victoria Village,NY


In [106]:
demographics_df.head()

Unnamed: 0,Neighborhood,Borough,Population,Density,AvgIncome,Commuting%
0,Crescent Town,EY,8157,20393,23021,24.5
1,Governor's Bridge,EY,2112,1129,129904,7.1
2,Leaside,EY,13876,4938,82670,9.7
3,O'Connor–Parkview,EY,17740,3591,33517,15.8
4,Old East York,EY,52220,6577,33172,22.0


In [107]:
merged = pd.merge(toronto_cluster,demographics_df)
merged.head()
#Need to get the merge to keep all data in toronto cluster, so left join

Unnamed: 0,VenueCategory,VenueLatitude,VenueLongitude,Labels,VenueName,marker_color,Coffee Distance,Neighborhood,Borough,Population,Density,AvgIncome,Commuting%
0,False,43.751976,-79.33214,3,Brookbanks Park,red,2.256931,Parkwoods,NY,26533,5349,34811,14.0
1,False,43.751976,-79.33214,3,Brookbanks Park,red,2.256931,Parkwoods,NY,26533,5349,34811,14.0
2,False,43.751389,-79.332184,3,Brookbanks Pool,red,2.199774,Parkwoods,NY,26533,5349,34811,14.0
3,False,43.751389,-79.332184,3,Brookbanks Pool,red,2.199774,Parkwoods,NY,26533,5349,34811,14.0
4,False,43.751974,-79.333114,3,Variety Store,red,2.230391,Parkwoods,NY,26533,5349,34811,14.0


In [108]:
Profit_Calc = pd.get_dummies(merged[['Coffee Distance']], prefix="", prefix_sep="")
Profit_Calc['Population'] = merged['Population']
Profit_Calc['Density'] = merged['Density']
Profit_Calc['Avg Income'] = merged['AvgIncome']
Profit_Calc['Commuting%'] = merged['Commuting%']

#Profit_Calc.head()

x = Profit_Calc.values
min_max_scaler = preprocessing.MinMaxScaler() 
x_scaled = min_max_scaler.fit_transform(x)

Profit_Calc = pd.DataFrame(x_scaled)

Profit_Calc.head()


Unnamed: 0,0,1,2,3,4
0,0.357619,0.484783,0.060035,0.065084,0.333333
1,0.357619,0.484783,0.060035,0.065084,0.333333
2,0.348557,0.484783,0.060035,0.065084,0.333333
3,0.348557,0.484783,0.060035,0.065084,0.333333
4,0.353411,0.484783,0.060035,0.065084,0.333333


In [109]:
#Linear Combo of the Variables
#Y = scaler.transform(distances)[:,1].tolist() #may need to do seperate list for each variable, I'm thinking yeah

merged['Profit Score'] = (Profit_Calc.loc[:,0] + Profit_Calc.loc[:,1] + Profit_Calc.loc[:,2] + Profit_Calc.loc[:,3] + Profit_Calc.loc[:,4])/5  #Take the average of these 5 for now
merged.head()

Unnamed: 0,VenueCategory,VenueLatitude,VenueLongitude,Labels,VenueName,marker_color,Coffee Distance,Neighborhood,Borough,Population,Density,AvgIncome,Commuting%,Profit Score
0,False,43.751976,-79.33214,3,Brookbanks Park,red,2.256931,Parkwoods,NY,26533,5349,34811,14.0,0.260171
1,False,43.751976,-79.33214,3,Brookbanks Park,red,2.256931,Parkwoods,NY,26533,5349,34811,14.0,0.260171
2,False,43.751389,-79.332184,3,Brookbanks Pool,red,2.199774,Parkwoods,NY,26533,5349,34811,14.0,0.258358
3,False,43.751389,-79.332184,3,Brookbanks Pool,red,2.199774,Parkwoods,NY,26533,5349,34811,14.0,0.258358
4,False,43.751974,-79.333114,3,Variety Store,red,2.230391,Parkwoods,NY,26533,5349,34811,14.0,0.259329


In [110]:
merge_group = merged.groupby('Neighborhood').mean() #The top 15 neighborhoods based on the average profit score metric
merge_group.sort_values('Profit Score', ascending = False).head(15)

Unnamed: 0_level_0,VenueCategory,VenueLatitude,VenueLongitude,Labels,Coffee Distance,Profit Score
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
St. James Town,0.058824,43.650098,-79.376167,1.941176,0.245395,0.449044
Agincourt,0.0,43.792049,-79.259427,3.0,3.842672,0.351064
Downsview,0.0,43.742198,-79.501877,0.75,2.855004,0.312063
Church and Wellesley,0.105263,43.666195,-79.382816,1.894737,0.20425,0.308067
Woburn,0.5,43.771113,-79.220309,2.0,0.299168,0.279654
Rosedale,0.0,43.679754,-79.377335,2.0,0.786679,0.277084
Parkwoods,0.0,43.75178,-79.33248,3.0,2.229032,0.259286
Bayview Village,0.0,43.787903,-79.38086,4.0,3.222711,0.232736
Humber Summit,0.0,43.757837,-79.567048,0.0,6.167868,0.231885
Weston,0.0,43.705312,-79.515829,0.0,1.921335,0.213516


In [111]:
#Map the top 15
top_15_neigh = merge_group.sort_values('Profit Score', ascending = False).head(15)
#top_15_neigh.head()

In [112]:
top_15_neigh = top_15_neigh.reset_index()

In [113]:
map_best_neigh = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for  neigh, lat, lng, category, score, cd in zip(top_15_neigh['Neighborhood'], top_15_neigh['VenueLatitude'], top_15_neigh['VenueLongitude'], top_15_neigh['VenueCategory'], top_15_neigh['Profit Score'], top_15_neigh['Coffee Distance']):
    label = '{}, {}, {}'.format(neigh, category, score)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='Blue',
        fill=True,
        parse_html=False).add_to(map_best_neigh)  
map_best_neigh

In [143]:
#Make it top 5 and add numbers to map marker#Make it top 5 and add numbers to map marker

top_5_neigh = merge_group.sort_values('Profit Score', ascending = False).head(5)
top_5_neigh = top_5_neigh.reset_index()

In [147]:
top_5_neigh

Unnamed: 0,Neighborhood,VenueCategory,VenueLatitude,VenueLongitude,Labels,Coffee Distance,Profit Score
0,St. James Town,0.058824,43.650098,-79.376167,1.941176,0.245395,0.449044
1,Agincourt,0.0,43.792049,-79.259427,3.0,3.842672,0.351064
2,Downsview,0.0,43.742198,-79.501877,0.75,2.855004,0.312063
3,Church and Wellesley,0.105263,43.666195,-79.382816,1.894737,0.20425,0.308067
4,Woburn,0.5,43.771113,-79.220309,2.0,0.299168,0.279654


In [151]:
#Inefficient but I want to make it pretty

from folium.features import DivIcon

m = folium.Map(
    location=[latitude, longitude],
    zoom_start=12,
    #tiles='Mapbox Bright'
)

p1 = [top_5_neigh.iloc[0]['VenueLatitude'], top_5_neigh.iloc[0]['VenueLongitude']]
folium.Marker(p1, icon=DivIcon(
        icon_size=(150,36),
        icon_anchor=(7,20),
        html='<div style="font-size: 18pt; color : black">1</div>',
        )).add_to(m)
m.add_child(folium.CircleMarker(p1, radius=15, color='Green'))

p2 = [top_5_neigh.iloc[1]['VenueLatitude'], top_5_neigh.iloc[1]['VenueLongitude']]
folium.Marker(p2, icon=DivIcon(
        icon_size=(150,36),
        icon_anchor=(7,20),
        html='<div style="font-size: 18pt; color : black">2</div>',
        )).add_to(m)
m.add_child(folium.CircleMarker(p2, radius=15, color='Green'))

p3 = [top_5_neigh.iloc[2]['VenueLatitude'], top_5_neigh.iloc[2]['VenueLongitude']]
folium.Marker(p3, icon=DivIcon(
        icon_size=(150,36),
        icon_anchor=(7,20),
        html='<div style="font-size: 18pt; color : black">3</div>',
        )).add_to(m)
m.add_child(folium.CircleMarker(p3, radius=15, color = 'Green'))

p4 = [top_5_neigh.iloc[3]['VenueLatitude'], top_5_neigh.iloc[3]['VenueLongitude']]
folium.Marker(p4, icon=DivIcon(
        icon_size=(150,36),
        icon_anchor=(7,20),
        html='<div style="font-size: 18pt; color : black">4</div>',
        )).add_to(m)
m.add_child(folium.CircleMarker(p4, radius=15, color = 'Green'))

p5 = [top_5_neigh.iloc[4]['VenueLatitude'], top_5_neigh.iloc[4]['VenueLongitude']]
folium.Marker(p5, icon=DivIcon(
        icon_size=(150,36),
        icon_anchor=(7,20),
        html='<div style="font-size: 18pt; color : black">5</div>',
        )).add_to(m)
m.add_child(folium.CircleMarker(p5, radius=15, color = 'Green'))