# Segmenting and Clustering Neighborhoods in Toronto

### Import the data

In [1]:
import pandas as pd
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'

df=pd.read_html(url, header=0)[0]
df.columns=['Postcode','Borough','Neighbourhood']

df.drop(df[df['Borough']=="Not assigned"].index,axis=0, inplace=True)

df.reset_index()
df=df.groupby("Postcode").agg(lambda x:','.join(set(x)))
df.Neighbourhood.replace('Not assigned',df.Borough,inplace=True)


df.shape

(103, 2)

### Retrieve Geospacial data

In [2]:
geo_data=pd.read_csv("https://cocl.us/Geospatial_data")
geo_data

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### Insert latitudes/longitudes to the dataframe

In [3]:
df['Latitude']=geo_data['Latitude'].values
df['Longitude']=geo_data['Longitude'].values

df

Unnamed: 0_level_0,Borough,Neighbourhood,Latitude,Longitude
Postcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
M1B,Scarborough,"Rouge,Malvern",43.806686,-79.194353
M1C,Scarborough,"Highland Creek,Rouge Hill,Port Union",43.784535,-79.160497
M1E,Scarborough,"Morningside,Guildwood,West Hill",43.763573,-79.188711
M1G,Scarborough,Woburn,43.770992,-79.216917
M1H,Scarborough,Cedarbrae,43.773136,-79.239476
M1J,Scarborough,Scarborough Village,43.744734,-79.239476
M1K,Scarborough,"Ionview,East Birchmount Park,Kennedy Park",43.727929,-79.262029
M1L,Scarborough,"Oakridge,Clairlea,Golden Mile",43.711112,-79.284577
M1M,Scarborough,"Scarborough Village West,Cliffcrest,Cliffside",43.716316,-79.239476
M1N,Scarborough,"Birch Cliff,Cliffside West",43.692657,-79.264848


### Generate data map

In [5]:

!conda install -c conda-forge folium=0.5.0 --yes
import folium

# Find the center of all the locations and prepare the folium map

center_lat=df.Latitude.mean()
center_long=df.Longitude.mean()

venues_map = folium.Map(location=[center_lat, center_long], zoom_start=13)

df2=df.head(80)
for lat, lng, label in zip(df2.Latitude, df2.Longitude, df2.Borough):
    folium.features.CircleMarker(
            [lat, lng],
            radius=2,
            color='blue',
            popup=label,
            fill = True,
            fill_color='blue',
            fill_opacity=0.6
    ).add_to(venues_map)
venues_map

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.5.0               |             py_0          45 KB  conda-forge
    certifi-2019.6.16          |           py36_1         149 KB  conda-forge
    ca-certificates-2019.6.16  |       hecc5488_0         145 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    altair-3.1.0               |           py36_0         724 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.2 MB

The following NEW packages will be 