In [58]:
from IPython.display import display

# Segmenting and Clustering Neighborhoods in Toronto

This assignment is about data clustering on example of Toronto neigborhoods.


## Part 1 - Creating dataset
------
Before we can start with machine learning algorithms, we have to create and prepare our dataset. This is what I will do in thist part. So after importing necessary libraries this code will scarpe [wikipedia page](https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M) containing needed data. To do so I have used pandas function `read_html`, which works like magic.

In [1]:
import pandas as pd 
import requests

In [2]:
link = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

r=requests.get(link)

tables = pd.read_html(r.text)
raw_df = tables[0]
raw_df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


I is necessary to handle `null` values according to assignment. Even thought assignment says that there could be some postal codes with borought assigned but neighbourhood not assigned, I have noticed that this doesn't occur in this case. Therefore part of code responsible for handling this situation won't have any impact on the dataset.

In [3]:
df = raw_df.copy().replace({'Not assigned': None})
df.dropna(subset=['Borough'],inplace=True)
df['Neighbourhood'][df['Neighbourhood'].isnull()] = df['Borough'][df['Neighbourhood'].isnull()]
df.reset_index(drop=True,inplace=True)
df.head(10)

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
5,M9A,Etobicoke,"Islington Avenue, Humber Valley Village"
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


Now we can check first 10 rows of our dataset and next cell, will show shape of dataset. This closes part 1.

In [4]:
df.shape

(103, 3)

## Part 2 - Adding geospatial data
-----
In this part I add geospatial data to dataset. Unfortunatelly I was unable to use geocoder, so use of csv file is necessary. 

In [5]:
df = df.join(pd.read_csv('Geospatial_Coordinates.csv').set_index('Postal Code'), on='Postal Code')
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494


## Part 3 - Final segmentation and clustering
-------
In this part I will

In [6]:
import sklearn
import folium

In [18]:
longitude=(df['Longitude'].max()+df['Longitude'].min())/2
latitude=(df['Latitude'].max()+df['Latitude'].min())/2

In [69]:
f = folium.Figure(width=1600, height=800)
my_map = folium.Map(location=[latitude, longitude], zoom_start=12).add_to(f)

# instantiate a feature group for the incidents in the dataframe
hoods = folium.map.FeatureGroup()
for index, row in df.iterrows():
    label = '{}, {}'.format(row['Borough'], row['Neighbourhood'])
    label = folium.Popup(label)
    hoods.add_child(
        folium.CircleMarker(
        [row['Latitude'],row['Longitude']],
        radius=5,
        popup=label,
        parse_html=False)
    )

my_map.add_child(hoods)
display(f)

In [72]:
CLIENT_ID = 'ZGBSSJDBVOA5V1KT5BKZJSLJMN2RIRJDS0BRABFHTD0XXVKJ'
CLIENT_SECRET = 'PLPRZCGGX3BFDT13Y4FPS2RGSIA2L3PTIQHGMD5MMMM3ZHE4'
ACCESS_TOKEN = '' # your FourSquare Access Token
VERSION = '20210101'
LIMIT = 30

In [83]:
search_query = 'M3A'
radius = 500
url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&near={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION, search_query, radius, LIMIT)

results = requests.get(url).json()

In [84]:
results

imary': True}],
    'referralId': 'v-1609791112',
    'hasPerk': False},
   {'id': '4f3a69f9e4b024185be5a99b',
    'name': '17 Brookbanks Drive',
    'location': {'address': '15 Brookbanks Dr.',
     'lat': 43.752265506639944,
     'lng': -79.33232199236224,
     'labeledLatLngs': [{'label': 'display',
       'lat': 43.752265506639944,
       'lng': -79.33232199236224}],
     'distance': 5801,
     'postalCode': 'M3A 2S9',
     'cc': 'CA',
     'city': 'Toronto',
     'state': 'ON',
     'country': 'Canada',
     'formattedAddress': ['15 Brookbanks Dr.',
      'Toronto ON M3A 2S9',
      'Canada']},
    'categories': [{'id': '4d954b06a243a5684965b473',
      'name': 'Residential Building (Apartment / Condo)',
      'pluralName': 'Residential Buildings (Apartments / Condos)',
      'shortName': 'Residential',
      'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/apartment_',
       'suffix': '.png'},
      'primary': True}],
    'venuePage': {'id': '599166673'},
    

In [70]:
my_map.choropleth(
    geo_data='simple.geojson',## thanks http://adamw523.com/toronto-geojson/
    # data=df_can,
    # columns=['Country', 'Total'],
    # key_on='feature.properties.name',
    # fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
)

display(f)