# Segmenting and Clustering Neighborhoods in Toronto 
## Part 1

In [1]:
import pandas as pd


<p>Read data from wikipedia and turn the tables in a list then convert to a dataframe </p>

In [2]:
tables = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M', header=0)
print(type(tables))
table = pd.DataFrame(tables[0])
table.head()


<class 'list'>


Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


<p>Only process the cells that have an assigned borough. Ignore cells with a borough that is <b>Not assigned.</b></p>

In [3]:
table = table[table.Borough != 'Not assigned']

<p>extract the unique values of postcodes</p>

In [4]:
colname = ['Postcode']
uniquepost =table.Postcode.unique()
df = pd.DataFrame(columns=colname)
df['Postcode'] = uniquepost
df.head()

Unnamed: 0,Postcode
0,M3A
1,M4A
2,M5A
3,M6A
4,M7A


<p>If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.</p>

In [5]:
for index,data in table.iterrows():
    if data['Neighborhood'] == 'Not assigned':
        data['Neighborhood'] = data['Borough']

<p>cluster all Neighborhood from the same post code in the cell separaed by coma</p>

In [6]:
auxB=''
auxN=[]
colname = ['Postcode','Borough','Neighborhood']
finaldf = pd.DataFrame(columns=colname)

for index,postcode in df.iterrows():
    auxN=[]
    for index2,data in table.iterrows():
        if data['Postcode'] == postcode['Postcode']:
            auxB= data['Borough']
            auxN.append(data['Neighborhood'])
            
    stringN = ', '.join(auxN)
    finaldf=finaldf.append({'Postcode':postcode['Postcode'],
                            'Borough':auxB,
                            'Neighborhood': stringN },ignore_index=True)        

In [7]:
print(finaldf.shape)
finaldf

(103, 3)


Unnamed: 0,Postcode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park
5,M9A,Downtown Toronto,Queen's Park
6,M1B,Scarborough,"Rouge, Malvern"
7,M3B,North York,Don Mills North
8,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,M5B,Downtown Toronto,"Ryerson, Garden District"


# Segmenting and Clustering Neighborhoods in Toronto 
## Part 2

In [None]:
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab

In [8]:
from geopy import geocoders
import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

<p>the following code gives you an error because the postcodes are not updated and many of them are missing</p>

In [None]:
#from geopy import geocoders
#g = geocoders.Nominatim(user_agent="my-application")
#for index,data in df.iterrows():
#    strlocation = '{}, Toronto, Ontario'.format(data['Postcode'])
#    print(strlocation)
#    location = g.geocode(strlocation)
    #print(location.latitude, location.longitude)
    #data['Latitude'] = location.latitude
    #data['Longitude'] = location.longitude
#

<p>for that reason I'm using the second option to open csv with all the coordinates and add to our data frame.</p>

In [9]:
# Get the Latitude and Longitude coordenates for every PostalCode 
url='http://cocl.us/Geospatial_data'
df_pcodes=pd.read_csv(url)
df_pcodes.columns = ['Postcode', 'Latitude', 'Longitude']
df_pcodes.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


<p>we merge using post code as pivot</p>

In [11]:
# Merge with the dataframe of Toronto Postal code with its corresponding Latitude and Longitude
df_neighbor_code=pd.merge(finaldf,df_pcodes, how='right', on = 'Postcode')
df_neighbor_code.head(15)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,Harbourfront,43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494
5,M9A,Downtown Toronto,Queen's Park,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.706397,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.657162,-79.378937


# Segmenting and Clustering Neighborhoods in Toronto 
## Part 3

In [15]:
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    altair-4.0.0               |             py_0         606 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         704 KB

The following NEW packages will be INSTALLED:

    altair:  4.0.0-py_0 conda-forge
    branca:  0.3.1-py_0 conda-forge
    folium:  0.5.0-py_0 conda-forge
    vincent: 0.4.4-py_1 conda-forge


Downloading and Extracting Packages
vincent-0.4.4        | 28 KB     | #####

In [20]:
# Create map
#toronto_coords = get_latlng('')
map_toronto = folium.Map(location=[43.7184038,-79.5181445], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df_neighbor_code['Latitude'], 
                                           df_neighbor_code['Longitude'], 
                                           df_neighbor_code['Borough'], 
                                           df_neighbor_code['Neighborhood']):
    label = '{} - {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto