# Segmenting and Clustering Neighborhoods in Toronto

## Part 1 - Generate the dataframe

### Download and import libraries

In [1]:
!pip install bs4
!pip install geocoder

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

## Download the data
- Download the data from Wikipedia: https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M
- Create a Beautiful soup object

In [3]:
data_info = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(data_info.text, 'html5lib')

### Create dataframe

- Firstly create a list

- Later after finding the table and table data create a dictionary called cell having 3 keys PostalCode, Borough and Neighborhood.

- As postal code contains upto 3 characters extract that using tablerow.p.text

- Next use split ,strip and replace functions for getting Borough and Neighborhood information..

- Append to the list

- Create a dataframe with list



In [4]:
table_contents=[]
table=soup.find('table')
for row in table.findAll('td'):
    cell = {}
    if row.span.text=='Not assigned':
        pass
    else:
        cell['PostalCode'] = row.p.text[:3]
        cell['Borough'] = (row.span.text).split('(')[0]
        cell['Neighborhood'] = (((((row.span.text).split('(')[1]).strip(')')).replace(' /',',')).replace(')',' ')).strip(' ')
        table_contents.append(cell)

        
# print(table_contents)
df=pd.DataFrame(table_contents)
df['Borough']=df['Borough'].replace({'Downtown TorontoStn A PO Boxes25 The Esplanade':'Downtown Toronto Stn A',
                                             'East TorontoBusiness reply mail Processing Centre969 Eastern':'East Toronto Business',
                                             'EtobicokeNorthwest':'Etobicoke Northwest','East YorkEast Toronto':'East York/East Toronto',
                                             'MississaugaCanada Post Gateway Processing Centre':'Mississauga'})

### Dataframe

In [5]:
df.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Queen's Park,Ontario Provincial Government
5,M9A,Etobicoke,Islington Avenue
6,M1B,Scarborough,"Malvern, Rouge"
7,M3B,North York,Don Mills North
8,M4B,East York,"Parkview Hill, Woodbine Gardens"
9,M5B,Downtown Toronto,"Garden District, Ryerson"


### Use the `.shape` method to print the number of rows of your dataframe

In [6]:
df.shape

(103, 3)

## Part 2 - Extracting Latitude and Longitude


*I had some problems with geocoder, so I used the csv provided in the assignment*

In [7]:
link = "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DS0701EN-SkillsNetwork/labs_v1/Geospatial_Coordinates.csv"
df_coord = pd.read_csv(link)
df_coord.head(5)

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [8]:
# Rename Postal Code column
df_coord.rename(columns={'Postal Code':'PostalCode'}, inplace=True)
df_coord.head(5)

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Use the `.shape` method to print the number of rows of your dataframe

In [9]:
df_coord.shape

(103, 3)

### Merge both dataframes

In [10]:
df_new = pd.merge(df, df_coord, on='PostalCode')
df_new.head(15)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Queen's Park,Ontario Provincial Government,43.662301,-79.389494
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
7,M3B,North York,Don Mills North,43.745906,-79.352188
8,M4B,East York,"Parkview Hill, Woodbine Gardens",43.706397,-79.309937
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937


### Use the `.shape` method to print the number of rows of your dataframe

In [11]:
df_new.shape

(103, 5)

## Part 3 - Explore and cluster the neighborhoods in Toronto.

### Download and import libraries

In [12]:
!pip install folium

  from cryptography.utils import int_from_bytes
  from cryptography.utils import int_from_bytes


In [13]:
from geopy.geocoders import Nominatim # To get Toronto location
import folium

### Dataframe with only boroughs that contain the word Toronto 

In [14]:
toronto_df = df_new[df_new['Borough'].str.contains('Toronto')]
toronto_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.65426,-79.360636
9,M5B,Downtown Toronto,"Garden District, Ryerson",43.657162,-79.378937
15,M5C,Downtown Toronto,St. James Town,43.651494,-79.375418
19,M4E,East Toronto,The Beaches,43.676357,-79.293031
20,M5E,Downtown Toronto,Berczy Park,43.644771,-79.373306


In [15]:
toronto_df.shape

(39, 5)

### Draw Toronto map

In [16]:
geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode("Toronto, ON")
latitude = location.latitude
longitude = location.longitude
toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12)
toronto_map

### Add markers to the map from the dataframe with only boroughs that contain the word Toronto 

In [17]:
incidents = folium.map.FeatureGroup()

for lat, lng, in zip(toronto_df.Latitude, toronto_df.Longitude):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5,
            color='blue',
            fill=True,
            fill_color='white',
            fill_opacity=0.5
        )
    )
toronto_map.add_child(incidents)