In [0]:
import pandas as pd 
import requests 
from bs4 import BeautifulSoup
from time import sleep
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
from random import randint

In [19]:
r = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(r,'lxml')
soup.prettify()
table_names = []
for table in soup.find_all('table', class_='wikitable sortable'):
 for names in (table.find_all('td')):
    table_names.append(names.text.replace('\n',' '))
    
postal_code = table_names[0::3]
borough = table_names[1::3]
neighborhood = table_names[2::3]
new_postal_code = []

for x in postal_code:
  new_postal_code.append(x.split(' ')[0])

df_dic = {'Postal Code':new_postal_code,
          'Borough':borough, 
          'Neighborhood':neighborhood}

toronto = pd.DataFrame(df_dic)
toronto

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government
7,M8A,Not assigned,
8,M9A,Etobicoke,Islington Avenue
9,M1B,Scarborough,Malvern / Rouge


In [3]:
coordinates = pd.read_csv('/content/Geospatial_Coordinates.csv')
final_df = pd.merge(toronto,coordinates)
final_df


Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Regent Park / Harbourfront,43.65426,-79.360636
3,M6A,North York,Lawrence Manor / Lawrence Heights,43.718518,-79.464763
4,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,Malvern / Rouge,43.806686,-79.194353
7,M3B,North York,Don Mills,43.745906,-79.352188
8,M4B,East York,Parkview Hill / Woodbine Gardens,43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


In [0]:
import sklearn
import folium

In [5]:
latitude = 43.6532
longitude = -79.3832
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)
for lat, lng, borough, neighborhood in zip(final_df['Latitude'], final_df['Longitude'], final_df['Borough'], final_df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

In [18]:
from sklearn.cluster import KMeans
import numpy as np

X = final_df['Latitude']
Y = final_df['Longitude']
Z = np.stack((X, Y), axis=1)

kmeans = KMeans(n_clusters=4, random_state=0).fit(Z)
clusters = kmeans.labels_
final_df['Cluster'] = clusters
colors= ['red','blue','green','yellow']

toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for latitude, longitude, borough, cluster in zip(final_df['Latitude'], final_df['Longitude'], final_df['Borough'], final_df['Cluster']):
    label = folium.Popup(borough, parse_html=True)
    folium.CircleMarker(
        [latitude, longitude],
        radius=5,
        popup=label,
        color='black',
        fill=True,
        fill_color=colors[cluster],
        fill_opacity=0.7).add_to(toronto_map)  

toronto_map

In [0]:
CLIENT_ID = 'BTT0PWIXI0LS4RZMCYUYKEYQSLHFLKPEVIXF0FFOLGGNTBDZ'
CLIENT_SECRET = 'W31RT5WLEGX4FGWCDMEEER3UV3F11IBZ0CEDF3TBLS5A4YY3'
VERSION = '20180605'
LIMIT = 100

def getNearbyVenues( names,latitudes, longitudes, radius=500):
    
    venues_list=[]
    for  name,lat, lng in zip( names,latitudes, longitudes):

      url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, VERSION, lat, lng, radius, LIMIT)

      results = requests.get(url).json()["response"]['groups'][0]['items']
      venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
    
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    return(nearby_venues)
    

      
  
  
toronto_venues = getNearbyVenues(names=final_df['Neighborhood'],latitudes=final_df['Latitude'],longitudes=final_df['Longitude'])


In [7]:
toronto_venues.groupby('Neighborhood')['Venue Category'].count()

Neighborhood
Agincourt                                                                                                                                           4
Alderwood / Long Branch                                                                                                                             9
Bathurst Manor / Wilson Heights / Downsview North                                                                                                  20
Bayview Village                                                                                                                                     4
Bedford Park / Lawrence Manor East                                                                                                                 23
Berczy Park                                                                                                                                        57
Birch Cliff / Cliffside West                                                           