# WEEK 3 ASSIGNMENT PART3

## Segmenting and Clustering Neighborhoods in Toronto

Step 1: Load the data to the dataframe

Step 2: Process the data

    2.1. Ignore rows with a Borough that is "Not assigned"
    
    2.2. Group neighborhoods by postcode
    
    2.3. Name "Not assigned" neighbourhood after the Borough
    
Step 3. Load and include the geospatial data


### Step 1: Load the data to the dataframe

In [2]:
#Load necessary libraries
import pandas as pd
import requests

In [3]:
from bs4 import BeautifulSoup
url="https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
soup = BeautifulSoup(requests.get(url).text,'lxml')
My_table = soup.find('table',{'class':'wikitable sortable'})

My_table
table_rows = My_table.find_all('tr')

t=[]
for tr in table_rows:
    td = tr.find_all('td')
    row = [tr.text.rstrip('\n') for tr in td]
    t.append(row)
df=pd.DataFrame(t)

df.columns=['Postcode','Borough','Neighbourhood']

df.drop([0],axis=0,inplace=True)

df.reset_index()

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront


### Step 2. Process the data

In [4]:
df.drop(df[df['Borough'] == 'Not assigned'].index, inplace = True)

#re-index the dataframe
df = df.reset_index(drop=True)

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M5A,Downtown Toronto,Regent Park
4,M6A,North York,Lawrence Heights


### Group neighborhoods by postcode

In [7]:
df_part3 = df.groupby("Postcode").agg(lambda x:', '.join(set(x)))
df_part3 = df_new.reset_index()
df_part3.loc[7:14]

Unnamed: 0,index,Postcode,Borough,Neighbourhood
7,7,M1L,Scarborough,"Golden Mile, Oakridge, Clairlea"
8,8,M1M,Scarborough,"Cliffside, Scarborough Village West, Cliffcrest"
9,9,M1N,Scarborough,"Birch Cliff, Cliffside West"
10,10,M1P,Scarborough,"Scarborough Town Centre, Wexford Heights, Dors..."
11,11,M1R,Scarborough,"Wexford, Maryvale"
12,12,M1S,Scarborough,Agincourt
13,13,M1T,Scarborough,"Sullivan, Clarks Corners, Tam O'Shanter"
14,14,M1V,Scarborough,"Agincourt North, L'Amoreaux East, Steeles East..."


### Not assigned neighbourhood after the Borough

In [8]:
for index, row in df_part3.iterrows():
    if df_part3.loc[index, 'Neighbourhood'] == "Not assigned":
        df_part3.loc[index, 'Neighbourhood'] = df_part3.loc[index, 'Borough']
        print(index)

85


### Dataframe Shape

In [9]:

df_part3.shape

(103, 4)

### Step 3. Load and include the geospatial data

In [10]:
read_data = pd.read_csv("https://cocl.us/Geospatial_data")
read_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [11]:
df_read = df_new
df_read['Latitude'] = read_data['Latitude'].values
df_read['Longitude'] = read_data['Longitude'].values
df_read.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Port Union, Rouge Hill, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, West Hill, Morningside",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [12]:
df_read.shape

(103, 5)

## Result : Visualize the data on map

In [14]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium # plotting library

print('Folium installed')


Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/DSX-Python35

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.5.0               |             py_0          45 KB  conda-forge
    altair-2.2.2               |           py35_1         462 KB  conda-forge
    certifi-2018.8.24          |        py35_1001         139 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.0.2r             |       h14c3975_0         3.1 MB  conda-forge
    ca-certificates-2019.3.9   |       hecc5488_0         146 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         4.0 MB

The following NEW packages will

### Displaying all of the Boroughs and Neighbourhoods

In [18]:
lat_center = df_read['Latitude'].mean()
lon_center = df_read['Longitude'].mean()
#print("The center of the map is: (x: %s, y: %s)" % (lat_center, lon_center))

#Draw the map
map_all = folium.Map(location=[lat_center, lon_center], tiles='cartodbpositron', zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood, postcode in zip(df_read['Latitude'], df_read['Longitude'], df_read['Borough'], df_read['Neighbourhood'], df_read['Postcode']):
    label = '{}, {}, {}'.format(neighborhood, borough, postcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6).add_to(map_all)
    
# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [lat_center, lon_center],
    radius=10,
    color='red',
    popup='Center of the map',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(map_all)

# display map
map_all

### Displaying all Boroughs that contains the word "Toronto

In [20]:
df_toronto = df_read.set_index('Borough').filter(like='Toronto', axis=0)
df_toronto = df_toronto.reset_index()
df_toronto.head()

Unnamed: 0,Borough,Postcode,Neighbourhood,Latitude,Longitude
0,East Toronto,M4E,The Beaches,43.676357,-79.293031
1,East Toronto,M4K,"The Danforth West, Riverdale",43.679557,-79.352188
2,East Toronto,M4L,"The Beaches West, India Bazaar",43.668999,-79.315572
3,East Toronto,M4M,Studio District,43.659526,-79.340923
4,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879


In [21]:
lat_center_t = df_toronto['Latitude'].mean()
lon_center_t = df_toronto['Longitude'].mean()

#Draw the map
map_toronto = folium.Map(location=[lat_center_t, lon_center_t], tiles='cartodbpositron', zoom_start=12)

# add markers to map
for lat, lng, borough, neighborhood, postcode in zip(df_toronto['Latitude'], df_toronto['Longitude'], df_toronto['Borough'], df_toronto['Neighbourhood'], df_toronto['Postcode']):
    label = '{}, {}, {}'.format(neighborhood, borough, postcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6).add_to(map_toronto)
    
# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [lat_center_t, lon_center_t],
    radius=10,
    color='red',
    popup='Center of the map',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(map_toronto)

# display map
map_toronto

### Display all the Boroughs

In [22]:
df_borough = df_read.groupby(['Borough'],as_index=False).mean()
df_borough = df_borough.reset_index()
df_borough.head()

Unnamed: 0,index,Borough,Latitude,Longitude
0,0,Central Toronto,43.70198,-79.398954
1,1,Downtown Toronto,43.654169,-79.383665
2,2,East Toronto,43.669436,-79.324654
3,3,East York,43.700303,-79.335851
4,4,Etobicoke,43.660043,-79.542074


In [23]:
lat_center_b = df_borough['Latitude'].mean()
lon_center_b = df_borough['Longitude'].mean()

#Draw the map
map_borough = folium.Map(location=[lat_center_b, lon_center_b], tiles='cartodbpositron', zoom_start=11)

# add markers to map
for lat, lng, borough in zip(df_borough['Latitude'], df_borough['Longitude'], df_borough['Borough']):
    label = 'Center of {}'.format(borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.6).add_to(map_borough)
    
# add a red circle marker to represent the Conrad Hotel
folium.features.CircleMarker(
    [lat_center_b, lon_center_b],
    radius=10,
    color='red',
    popup='Center of the map',
    fill = True,
    fill_color = 'red',
    fill_opacity = 0.6
).add_to(map_borough)

# display map
map_borough

## Thankyou Please review