# IBM Data Science Professional Certificate Week 3 #

## Peer-graded Assignment: Segmenting and Clustering Neighborhoods in Toronto ##

## Use the Notebook to build the code to scrape the following Wikipedia page, https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M, in order to obtain the data that is in the table of postal codes and to transform the data into a pandas  dataframe ##

In [1]:
#Import Libraries
import requests
import lxml.html as lh
import bs4 as bs
import urllib.request
import numpy as np 
import pandas as pd

In [None]:
#Getting the data from url
url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
res = requests.get(url)

soup = bs.BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0]
df = pd.read_html(str(table))
data = pd.read_json(df[0].to_json(orient='records'))

In [None]:
#First 10 records
data.head(10)

In [None]:
#Exclude data field Borough with not assigned value
df_selected = data[data['Borough'] != 'Not assigned']

In [None]:
df_selected

In [None]:
df_selected_gp2 = df_selected.groupby(['Postal Code','Borough'], as_index=False).agg(','.join)

In [None]:
df_selected_gp2

In [None]:
#Replacing values in Neighbourhood field with Borough where Neighbourhood is not assigned
df_selected_gp['Neighbourhood'] = np.where(df_selected_gp2['Neighbourhood'] == 'Not assigned', df_selected_gp2['Borough'], df_selected_gp2['Neighbourhood'])

In [None]:
#Shape of Data
df_selected_gp2.shape

## Geocoder Python package using http://cocl.us/Geospatial_data ##

In [None]:
geo_url = "http://cocl.us/Geospatial_data"
geo_data = pd.read_csv(geo_url)

In [None]:
geo_data.shape

In [None]:
# Renaming the columns
geo_data.columns = ['Postal Code', 'Latitude', 'Longitude']

In [None]:
geo_data.columns

In [None]:
#Merging dataframes
merged_data2 = pd.merge(df_selected_gp2, geo_data, on='Postal Code')

In [None]:
merged_data2

## Q3 Explore and cluster the neighborhoods in Toronto ## 

In [None]:
!pip install geopy

In [None]:
!pip install folium

In [None]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

In [None]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(merged_data2['Borough'].unique()),
        merged_data2.shape[0]
    )
)

In [None]:
address = 'Toronto, TO'

geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto City are {}, {}.'.format(latitude, longitude))

## Create a map of New York with neighborhoods superimposed on top. ##

In [None]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(merged_data2['Latitude'], merged_data2['Longitude'], merged_data2['Borough'], merged_data2['Neighbourhood']):
    label = '{}, {}'.format(merged_data2, borough) 
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto