### Get Data from URL

In [1]:
import requests
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
urldata = requests.get(url).text

#print(urldata)



### Parse HTML data and convert into Dataframe

In [2]:
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

soup = BeautifulSoup(urldata, 'lxml')

tableData= []

table = soup.find('table')

for row in table.find_all('tr'):
    col_data = []
    for column in row.find_all('td'):
        col_data.append(column.text.rstrip())
    tableData.append(col_data)

table_df = pd.DataFrame(data = tableData, columns = ['Postal Code', 'Borough','Neighborhood'])

#table_df

### Data Cleaning

In [3]:
table_df.fillna(np.NaN, inplace=True)

table_df.replace(to_replace='Not assigned', value=np.NaN, inplace=True)

table_df.dropna(axis=0, inplace=True)
table_df.reset_index(drop=True, inplace=True)

table_df.shape

(103, 3)

In [4]:
#table_df[0:5]

table_df.sort_values(by=['Postal Code'], ascending=True, inplace=True)
table_df[0:5]

Unnamed: 0,Postal Code,Borough,Neighborhood
6,M1B,Scarborough,"Malvern, Rouge"
12,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
18,M1E,Scarborough,"Guildwood, Morningside, West Hill"
22,M1G,Scarborough,Woburn
26,M1H,Scarborough,Cedarbrae


### Know how geocoder works

In [58]:
! pip install --user geocoder

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 7.3MB/s ta 0:00:011
[?25hCollecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6


In [None]:
import geocoder # import geocoder

# initialize your variable to None
lat_lng_coords = None

# loop until you get the coordinates
while(lat_lng_coords is None):
  g = geocoder.google('{}, Toronto, Ontario'.format('M5G'))
  lat_lng_coords = g.latlng

latitude = lat_lng_coords[0]
longitude = lat_lng_coords[1]


### Get Latitude & Longitude for each Postal code from CSV

In [5]:
# The code was removed by Watson Studio for sharing.

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Combining both Data Frames

In [6]:
table_df['Latitude'] = df_data_1['Latitude']
table_df['Longitude'] = df_data_1['Longitude']

table_df.reset_index(drop=True, inplace=True)
table_df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.727929,-79.262029
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.7942,-79.262029
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.778517,-79.346556
3,M1G,Scarborough,Woburn,43.77012,-79.408493
4,M1H,Scarborough,Cedarbrae,43.745906,-79.352188


### Create new Data Frame with only boroughs that contain the word Toronto

In [19]:
table_toronto = []
table_df['Borough'].size


table_df.loc[table_df['Borough'] == 'Toronto']

Toronto_df= table_df[table_df['Borough'].str.find('Toronto') != -1]

Toronto_df.reset_index(drop=True, inplace=True)

Toronto_df 
    

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.786947,-79.385975
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.704324,-79.38879
3,M4M,East Toronto,Studio District,43.657162,-79.378937
4,M4N,Central Toronto,Lawrence Park,43.648198,-79.379817
5,M4P,Central Toronto,Davisville North,43.653206,-79.400049
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.693781,-79.428191
7,M4S,Central Toronto,Davisville,43.713756,-79.490074
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.64896,-79.456325
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.636966,-79.615819


### Get library for map visualization 

In [17]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library


Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2020.4.5.1         |   py36h9f0ad1d_0         151 KB  conda-forge
    openssl-1.1.1g             |       h516909a_0         2.1 MB  conda-forge
    altair-4.1.0               |             py_1         614 KB  conda-forge
    python_abi-3.6             |          1_cp36m           4 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    branca-0.4.1               |             py_0          26 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    ca-certificates-2020.4.5.1 |       hecc5488_0         146 KB  conda-forge
    ------------------------------------------------------------
                       

### Get co-ordinates for Toronto 

In [18]:
address = 'Toronto, ON'

geolocator = Nominatim(user_agent="Toronto")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create Map

In [22]:
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, Neighbourhood in zip(Toronto_df['Latitude'], Toronto_df['Longitude'], Toronto_df['Borough'], Toronto_df['Neighborhood']):
    label = '{}, {}'.format(Neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto