# Segmenting and Clustering Neighborhoods in Toronto 

## Loading Libraries 

In [14]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [15]:
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
from sklearn.cluster import KMeans

## Loading Wkipedia page to txt 

In [16]:
source = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(source,'lxml')

In [17]:
table_can_zipinfo = soup.find('table')
colvals = table_can_zipinfo.find_all('td')

elem_cnt = len(colvals)

postcode = []
borough = []
neighborhood = []

for i in range(0, elem_cnt, 3):
    postcode.append(colvals[i].text.strip())
    borough.append(colvals[i+1].text.strip())
    neighborhood.append(colvals[i+2].text.strip())

### create dataframe which has  PostalCode, Borough, and Neighborhood columns

In [18]:
df_canada_postcode = pd.DataFrame(data=[postcode, borough, neighborhood]).transpose()
df_canada_postcode.columns = ['Postcode', 'Borough', 'Neighborhood']
df_canada_postcode.head()

Unnamed: 0,Postcode,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Cleanse the data and transform as per the given requirements

In [19]:
df_canada_postcode.drop(df_canada_postcode[df_canada_postcode['Borough'] == 'Not assigned'].index, inplace=True)
df_canada_postcode.loc[df_canada_postcode.Neighborhood == 'Not assigned', "Neighborhood"] = df_canada_postcode.Borough

### Group the data by Postcode & Borough

In [26]:
df_grp_canada = df_canada_postcode.groupby(['Postcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df_grp_canada.columns = ['Postcode', 'Borough', 'Neighborhood']

### Read the Geospatial csv file

In [27]:
df_latlng = pd.read_csv('http://cocl.us/Geospatial_data')
df_latlng.columns = ['Postcode', 'Latitude', 'Longitude']

In [28]:
df_join = pd.merge(df_grp_canada, df_latlng, on=['Postcode'], how='inner')

### Explore and cluster the neighborhoods in Toronto.

In [29]:
neighborhoods = df_join[['Borough', 'Neighborhood', 'Latitude', 'Longitude']].copy()
neighborhoods.head(5)

Unnamed: 0,Borough,Neighborhood,Latitude,Longitude
0,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,Scarborough,Woburn,43.770992,-79.216917
4,Scarborough,Cedarbrae,43.773136,-79.239476


In [30]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(neighborhoods['Borough'].unique()),
        neighborhoods.shape[0]
    )
)

The dataframe has 10 boroughs and 103 neighborhoods.


 ### First part completed

### Second part stated 

In [31]:
neighborhoods = df_join[['Postcode','Borough', 'Neighborhood', 'Latitude', 'Longitude']].copy()
neighborhoods.head (20)

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"Kennedy Park, Ionview, East Birchmount Park",43.727929,-79.262029
7,M1L,Scarborough,"Golden Mile, Clairlea, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffside, Cliffcrest, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


### Second part End 

### 3rd Part started

In [32]:
from geopy.geocoders import Nominatim

In [33]:
address = 'Toronto, Canada'
geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

  


The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [34]:
import folium

In [35]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Borough'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
folium.Map(location=[latitude, longitude], zoom_start=10)

### 3rd part ended 

In [None]:
s