In [1]:
import pandas as pd
import numpy as np

In [2]:
import requests 
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text

from bs4 import BeautifulSoup
soup = BeautifulSoup(website_url, 'lxml')

In [3]:
neighborhoods_table = soup.find('table',{"class":'wikitable sortable'})
for table in neighborhoods_table:
    rows = neighborhoods_table.find_all('tr')

The above cells get the table from the wikipedia link.

The below cell gets the 3 headers from the table.

In [4]:
headers = [th.text.rstrip() for th in rows[0].find_all('th')]

In [5]:
PostalCode = []
Borough = []
Neighborhood = []

In [6]:
str1 = 'Not assigned'
for row in rows:
    cells = row.find_all('td')
    if len(cells) > 1:
        if str1 not in str(cells[1]):
            postal = cells[0]
            PostalCode.append(postal.text.strip())
            boro = cells[1]
            Borough.append(boro.text.strip())
            neigh = cells[2]
            Neighborhood.append(neigh.text.strip())

The above cells make arrays for the data in the table, removing the Not Assigned neighborhoods.

The below cells put the data into a dictionary, and from there into a dataframe.

In [7]:
data = {'Postal Code':PostalCode,'Borough':Borough,'Neighborhood':Neighborhood}

In [8]:
dataframe1 = pd.DataFrame.from_dict(data)

In [9]:
dataframe1.head(10)

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


In [10]:
dataframe1.shape

(103, 3)

The below cells add the given coordinates CSV file, and then merges that dataframe with the existing dataframe on the Postal Code value, and then filters the dataframe for boroughs that contain "Toronto".

In [11]:
coords = pd.read_csv("http://cocl.us/Geospatial_data")
coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
CombinedData = pd.merge(dataframe1,coords,on = 'Postal Code')
CombinedData = CombinedData[CombinedData['Borough'].str.contains("Toronto")]
CombinedData.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031


The below cell installs and imports Folium for mapping

In [13]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium
print("Imported folium")

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.6.20          |   py36h9880bd3_2         151 KB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    openssl-1.1.1h             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2020.6.20  |       hecda079_0         145 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ------------------------------------------------------------
                       

The below cells create the map of Toronto and add markers for the neighborhood.    

In [14]:
map_toronto = folium.Map(location=[43.711012, -79.391726], zoom_start=12) 

In [15]:
for lat, lng, borough, neighborhood in zip(CombinedData['Latitude'], CombinedData['Longitude'], CombinedData['Borough'], CombinedData['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)    

In [16]:
map_toronto

In [17]:
CLIENT_ID = 'CKXZADO1B5NRIR1ZWLSOXEHTJ24AGMRTGKL3NWOBKAZ3HZT3'
CLIENT_SECRET = 'AHQU3CAA5H0UVCFI031TW5XB0S0JUZA1DNLKMS13EBAJD1DL'
VERSION = '20201018'
LIMIT = 100

In [85]:
def FindNearbyVenues(names, latitudes, longitudes, radius=500, search_query='coffee'):
    venue_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&query={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,
            search_query)

        results = requests.get(url).json()['response']['groups'][0]['items']
        
        venue_list.append([(
            name, 
            lat, 
            lng, 
            ven['venue']['name'], 
            ven['venue']['location']['lat'], 
            ven['venue']['location']['lng'],  
            ven['venue']['categories'][0]['name']) for ven in results])
    nearbySpots = pd.DataFrame([item for ven_list in venue_list for item in ven_list])
    nearbySpots.columns = ['Neighborhood', 'Neighborhood Lat', 'Neighborhood Long', 'Venue', ' Venue Lat', 'Venue Long', 'Venue Cat']
    
    return (nearbySpots)

In [74]:
test1 = FindNearbyVenues(names=CombinedData['Neighborhood'],latitudes=CombinedData['Latitude'],longitudes=CombinedData['Longitude'])

Regent Park, Harbourfront
Queen's Park, Ontario Provincial Government
Garden District, Ryerson
St. James Town
The Beaches
Berczy Park
Central Bay Street
Christie
Richmond, Adelaide, King
Dufferin, Dovercourt Village
Harbourfront East, Union Station, Toronto Islands
Little Portugal, Trinity
The Danforth West, Riverdale
Toronto Dominion Centre, Design Exchange
Brockton, Parkdale Village, Exhibition Place
India Bazaar, The Beaches West
Commerce Court, Victoria Hotel
Studio District
Lawrence Park
Roselawn
Davisville North
Forest Hill North & West, Forest Hill Road Park
High Park, The Junction South
North Toronto West,  Lawrence Park
The Annex, North Midtown, Yorkville
Parkdale, Roncesvalles
Davisville
University of Toronto, Harbord
Runnymede, Swansea
Moore Park, Summerhill East
Kensington Market, Chinatown, Grange Park
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport


### Now I cluster the data I have

In [91]:
from sklearn.cluster import KMeans
kclusters = 5
TorontoClusters = CombinedData.drop(['Neighborhood', 'Borough', 'Postal Code'], axis=1)
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(TorontoClusters)

kmeans.labels_[0:10] 
print(len(kmeans.labels_))
print(TorontoClusters.shape[0])

39
39


In [93]:
CombinedData.insert(0, 'Cluster Labels', kmeans.labels_)
CombinedData.head()

Unnamed: 0,Cluster Labels,Postal Code,Borough,Neighborhood,Latitude,Longitude
2,0,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
4,0,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
9,0,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,0,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,4,M4E,East Toronto,The Beaches,43.676357,-79.293031


### Now I create the map using the clustered data

In [99]:
import matplotlib.cm as cm
import matplotlib.colors as colors

MappedClusters = folium.Map(location=[43.711012, -79.391726], zoom_start=12)

x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(CombinedData['Latitude'], CombinedData['Longitude'], CombinedData['Neighborhood'], CombinedData['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(MappedClusters)
       
MappedClusters