# Segmenting and Clustering Neighborhoods in Toronto

### Source of the data: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M

## Scrapping the webpage for data

In [135]:
#Importing libraries needed
import pandas as pd
import requests
from bs4 import BeautifulSoup

#scraping wiki for the information and creating tables
req = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(req.content,'lxml')
table = soup.find_all('table')[0] 
data = pd.read_html(str(table))[0]

#Creating dataframe of the table
df = pd.DataFrame({'Postcode':Postcode, 'Borough': Borough, 'Neighborhood': Neighborhood})
df.head(3)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods


In [136]:
df.dtypes

Postcode        object
Borough         object
Neighborhood    object
dtype: object

In [137]:
#Only processing the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
df = df[(df['Borough'] != 'Not assigned')]
df.head()

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [138]:
#Combining Neighbourhoods within the same postcode.
df['Postcode'].unique().shape
post = df['Postcode'].unique()
nb =[]
for n in range(len(post)):
    nb.append(df[df['Postcode'] == post[n]].index.values)
bs = []
ns = []
for nh in range(len(NH_idx)):
    bs.append(df['Borough'].loc[nb[nh][0]])
    ns.append(df['Neighborhood'].loc[nb[nh]].values.astype('str'))
    
#creating new dataframe
df = pd.DataFrame({'Postcode': post, 'Borough': bs, 'Neighborhood': ns})
df.head(12)

Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,[Parkwoods]
1,M4A,North York,[Victoria Village]
2,M5A,Downtown Toronto,[Harbourfront]
3,M6A,North York,"[Lawrence Heights, Lawrence Manor]"
4,M7A,Downtown Toronto,[Queen's Park]
5,M9A,Queen's Park,[Not assigned]
6,M1B,Scarborough,"[Rouge, Malvern]"
7,M3B,North York,[Don Mills North]
8,M4B,East York,"[Woodbine Gardens, Parkview Hill]"
9,M5B,Downtown Toronto,"[Ryerson, Garden District]"


In [139]:
result.shape

(103, 3)

# Part 2: Latitude and the longitude coordinates of each neighborhood.

In [140]:
#had to download the csv using US proxy to access the data locally.
geo = pd.read_csv('Geospatial_Coordinates.csv')
geo.head()
    

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [141]:
geo.dtypes

Postal Code     object
Latitude       float64
Longitude      float64
dtype: object

In [152]:
#renaming so the Postcode matches between df and geo
geo.columns=['Postcode','Latitude','Longitude']
geo.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [153]:
#Merging the lists to create toronto data frame
geo.set_index("Postcode")
df.set_index("Postcode")
toronto_data=pd.merge(df, geo)
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,[Parkwoods],43.753259,-79.329656
1,M4A,North York,[Victoria Village],43.725882,-79.315572
2,M5A,Downtown Toronto,[Harbourfront],43.65426,-79.360636
3,M6A,North York,"[Lawrence Heights, Lawrence Manor]",43.718518,-79.464763
4,M7A,Downtown Toronto,[Queen's Park],43.662301,-79.389494


In [144]:
#installing folium for visualization
!conda install -c conda-forge folium=0.5.0 --yes

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... failed with initial frozen solve. Retrying with flexible solve.
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\Janis\Anaconda3

  added / updated specs:
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.0.0               |             py_0         606 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    certifi-2019.9.11          |           py37_0         147 KB  conda-forge
    conda-4.8.0                |           py37_1         3.0 MB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-fo

In [156]:
toronto_data.head()

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,[Parkwoods],43.753259,-79.329656
1,M4A,North York,[Victoria Village],43.725882,-79.315572
2,M5A,Downtown Toronto,[Harbourfront],43.65426,-79.360636
3,M6A,North York,"[Lawrence Heights, Lawrence Manor]",43.718518,-79.464763
4,M7A,Downtown Toronto,[Queen's Park],43.662301,-79.389494


In [159]:
#installing geopy
!pip install geopy

Collecting geopy
  Downloading https://files.pythonhosted.org/packages/80/93/d384479da0ead712bdaf697a8399c13a9a89bd856ada5a27d462fb45e47b/geopy-1.20.0-py2.py3-none-any.whl (100kB)
Collecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.20.0


# Toronto Coordinates

In [195]:
#Importing needed libraries
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans
import folium
address = 'Toronto, ON'

#Coordinates of Toronto
geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
latitude_toronto = location.latitude
longitude_toronto = location.longitude
print(' {}, {}.'.format(latitude_toronto, longitude_toronto))

 43.653963, -79.387207.


# Map of Toronto

In [196]:
#creating map of toronto
map_toronto = folium.Map(location=[latitude_toronto, longitude_toronto], zoom_start=10)
# This part adds Borough's to the map of Toronto
for lat, lng, borough, Neighbourhood in zip(toronto_data['Latitude'], toronto_data['Longitude'], toronto_data['Borough'], toronto_data['Neighborhood']):
    label = '{}, {}'.format(Borough,Neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],radius=3.5,popup=label,color='yellow',fill=True,fill_color='#cc5a31',fill_opacity=0.5,parse_html=False).add_to(map_toronto)  
map_toronto

# Using Foursquare to show nearby venues

In [185]:
#Foursquare details 
CLIENT_ID = 'XMI1F11HTL2UEBRVVMEJLRQR5BTQJHFQQHPYCRK2Y1XGFIB5' 
CLIENT_SECRET = '5MHGI1MMZBXJCURXRYH1Y1O0WTVWPHGMHICZZVPCC4OKV501' 
VERSION = '20180605'
radius=500
LIMIT=100
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    venues=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        
# create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID,CLIENT_SECRET,VERSION,lat,lng,radius,LIMIT)

#Building dataframe with the neighborhood and its venues information in toronto
    nearby_venues = pd.DataFrame([item for venue in venues for item in venue])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']


In [186]:
toronto_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,[Parkwoods],43.753259,-79.329656,Brookbanks Park,43.751976,-79.33214,Park
1,[Parkwoods],43.753259,-79.329656,649 Variety,43.754513,-79.331942,Convenience Store
2,[Parkwoods],43.753259,-79.329656,Variety Store,43.751974,-79.333114,Food & Drink Shop
3,[Victoria Village],43.725882,-79.315572,Victoria Village Arena,43.723481,-79.315635,Hockey Arena
4,[Victoria Village],43.725882,-79.315572,Tim Hortons,43.725517,-79.313103,Coffee Shop


In [187]:
toronto_venues.shape

(2227, 7)