#### Step 1 :
Importing the required Libraries.

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from IPython.display import display
import numpy as np

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

from geopy.geocoders import Nominatim
import folium

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000)

#### Step 2 :
Scraping Toronto postal codes data from Wikipedia page.

In [None]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source, 'lxml')

data = {}
table = soup.find('table', class_='wikitable')

for i, row in enumerate(table.tbody, 0):
    if i%2==0:
        if i==0:
            for heading in row.find_all('th'):
                data[heading.text[:-1] if heading.text.endswith('\n') else heading.text]=[]
        else:
            keys = list(data.keys())
            row_data = row.find_all('td')
            if row_data[1].text != 'Not assigned':
                if row_data[0].text not in data[keys[0]]:
                    data[keys[0]].append(row_data[0].text)
                    data[keys[1]].append(row_data[1].text)
                    data[keys[2]].append(row_data[2].text[:-1] if row_data[2].text != 'Not assigned' else row_data[1].text)
                else:
                    ind = data[keys[0]].index(row_data[0].text)
                    data[keys[2]][ind] += ', '+row_data[2].text[:-1] if row_data[2].text != 'Not assigned' else row_data[1].text

data = pd.DataFrame(data)
display(data.head(10))

#### Step 3 :
Getting the geographical coordinates of each postal code and adding it to the dataframe.

In [None]:
cords = pd.read_csv('http://cocl.us/Geospatial_data')
cords = cords.rename(columns={'Postal Code':'Postcode'})
df = pd.merge(data, cords, on='Postcode', how='left')
display(df.head(10))

#### Step 4 :
Showing all places on map.

In [None]:
address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="ny_explorer")
location = None

while location is None:
    try:
        location = geolocator.geocode(address)
    except:
        pass
        
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of New York City are {}, {}.'.format(latitude, longitude))

Pointing the places on map using folium.

In [None]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=4,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Step 5 :
Listing out only places whose borough contains the word 'Toronto' and plotting the same on the map.

In [None]:
temp = [False]*len(df['Postcode'])
for i, name in enumerate(df['Borough'], 0):
    if 'Toronto' in name:
        temp[i] = True

toronto_data = df[temp].reset_index(drop=True)
display(toronto_data.head(10))

Plotting the subset of places on the map.

In [None]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

#### Step 6 :
Define the foursquare API credentials, and get the nearby venue information.

In [None]:
clientID = 'LKYB2D43HOP4CK5MYWZ0YGHHJFLCKMDVIJVFS2WRWZJVVF03'
clientSecret = 'IE4HFIDTJHUSLMSNIBJ40UGURYBBRGXFD4UIPKX5YUKC0RO4'
version = '20180605'

print('Credentails:')
print('CLIENT ID: ' + clientID)
print('CLIENT SECRET:' + clientSecret)

Function to extract neighbourhood information using the API.

In [None]:
def getNearbyVenues(names, latitudes, longitudes, radius=500, limit=100):
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id='+clientID+'&client_secret='+clientSecret+'&v='+version+'&ll='+str(lat)+','+str(lng)+'&radius='+str(radius)+'&limit='+str(limit)     
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

venues = getNearbyVenues(names=df['Neighbourhood'], latitudes=df['Latitude'], longitudes=df['Longitude'])
display(venues.head(10))

#### Step 7 :
Generating the an encoding dataframe based on venue category

In [None]:
venue_encoding = pd.get_dummies(venues['Venue Category'])
venue_encoding.insert(loc=0, column='Neighbourhood', value=venues['Neighbourhood'])
display(venue_encoding.head(10))

Grouping rows by neighborhood and by taking the mean of the frequency of occurrence of each category.

In [None]:
groups = venue_encoding.groupby('Neighbourhood').mean().reset_index()
display(groups.head(10))

#### Step 8 :
Clustering neighbourhoods using KMeans into 5 clusters.

In [None]:
clusters = 5

kmeans = KMeans(n_clusters=clusters, random_state=0).fit(groups.iloc[:,1:])
kmeans.labels_[0:10]

groups.insert(loc=1, column='Cluster Labels', value=kmeans.labels_)
groups_clustered = pd.merge(toronto_data, groups, on='Neighbourhood', how='left')
groups_clustered.head(10)

#### Step 9 :
Plotting the neighbourhoods on the map with the coressponding cluster.

In [None]:
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=12)

x = np.arange(clusters)
ys = [i + x + (i*x)**2 for i in range(clusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

markers_colors = []
for lat, lon, poi, cluster in zip(groups_clustered['Latitude'], groups_clustered['Longitude'], groups_clustered['Neighborhood'], groups_clustered['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters