In [6]:
#conda install -c anaconda beautifulsoup4

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/fer/miniconda3

  added / updated specs:
    - beautifulsoup4


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    beautifulsoup4-4.8.2       |           py37_0         161 KB  anaconda
    certifi-2019.11.28         |           py37_0         156 KB  anaconda
    conda-4.8.2                |           py37_0         3.0 MB  anaconda
    soupsieve-1.9.5            |           py37_0          61 KB  anaconda
    ------------------------------------------------------------
                                           Total:         3.4 MB

The following NEW packages will be INSTALLED:

  beautifulsoup4     anaconda/linux-64::beautifulsoup4-4.8.2-py37_0
  soupsieve          anaconda/linux-64::soupsieve-1.9.5-py37_0

The following packages will be UPDATED:

  ca-c

In [10]:
#conda install -c anaconda lxml

Collecting package metadata (current_repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /home/fer/miniconda3

  added / updated specs:
    - lxml


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    libxslt-1.1.33             |       h7d1a2b0_0         577 KB  anaconda
    lxml-4.5.0                 |   py37hefd8a0e_0         1.5 MB  anaconda
    ------------------------------------------------------------
                                           Total:         2.1 MB

The following NEW packages will be INSTALLED:

  libxslt            anaconda/linux-64::libxslt-1.1.33-h7d1a2b0_0
  lxml               anaconda/linux-64::lxml-4.5.0-py37hefd8a0e_0



Downloading and Extracting Packages
lxml-4.5.0           | 1.5 MB    | ##################################### | 100% 
libxslt-1.1.33       | 577 KB    | ##################################### | 100% 
Prepar

In [20]:
import requests
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
from bs4 import BeautifulSoup


# define the dataframe columns
column_names = ['PostalCode', 'Borough', 'Neighborhood'] 

url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html_doc = requests.get(url).text
soup = BeautifulSoup(html_doc, 'html.parser')
content = soup.find('table', {'class': 'wikitable sortable'})

content_list = pd.read_html(str(content), skiprows=1)

In [21]:
# instantiate the dataframe
neighborhoods = pd.DataFrame.from_dict(content_list[0])
neighborhoods.columns=column_names

In [22]:
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M2A,Not assigned,Not assigned
1,M3A,North York,Parkwoods
2,M4A,North York,Victoria Village
3,M5A,Downtown Toronto,Harbourfront
4,M6A,North York,Lawrence Heights


In [23]:
# Ignore cells with a borough that is Not assigned.
neighborhoods = neighborhoods[neighborhoods.Borough != 'Not assigned']

In [24]:
neighborhoods.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
1,M3A,North York,Parkwoods
2,M4A,North York,Victoria Village
3,M5A,Downtown Toronto,Harbourfront
4,M6A,North York,Lawrence Heights
5,M6A,North York,Lawrence Manor


In [25]:
# combined into one row the neighborhoods with same postal code area
neighborhoods = neighborhoods.groupby(['PostalCode', 'Borough'])['Neighborhood'].apply(list).apply(lambda x:', '.join(x)).to_frame()
neighborhoods.reset_index()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
...,...,...,...
98,M9N,York,Weston
99,M9P,Etobicoke,Westmount
100,M9R,Etobicoke,"Kingsview Village, Martin Grove Gardens, Richv..."
101,M9V,Etobicoke,"Albion Gardens, Beaumond Heights, Humbergate, ..."


In [26]:
for i, cell in neighborhoods.iterrows():
    if cell['Neighborhood'] == 'Not assigned':
        cell['Neighborhood'] = cell['Borough']

In [27]:
neighborhoods.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Neighborhood
PostalCode,Borough,Unnamed: 2_level_1
M1B,Scarborough,"Rouge, Malvern"
M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae


In [28]:
neighborhoods.shape

(103, 1)

In [29]:
coordinates = pd.read_csv("Geospatial_Coordinates.csv")
neighborhoods = pd.merge(neighborhoods, coordinates, how='inner', left_on='PostalCode', right_on='Postal Code')
neighborhoods

Unnamed: 0,Neighborhood,Postal Code,Latitude,Longitude
0,"Rouge, Malvern",M1B,43.806686,-79.194353
1,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,Woburn,M1G,43.770992,-79.216917
4,Cedarbrae,M1H,43.773136,-79.239476
...,...,...,...,...
98,Weston,M9N,43.706876,-79.518188
99,Westmount,M9P,43.696319,-79.532242
100,"Kingsview Village, Martin Grove Gardens, Richv...",M9R,43.688905,-79.554724
101,"Albion Gardens, Beaumond Heights, Humbergate, ...",M9V,43.739416,-79.588437


In [32]:
#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [33]:
address = 'Toronto'

geolocator = Nominatim(user_agent="toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [38]:
# create map of New York using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(neighborhoods['Latitude'], neighborhoods['Longitude'], neighborhoods['Postal Code'], neighborhoods['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto