## 1. Initial Set-Up/Housekeeping to load needed modules/objects

In [11]:
!conda install -c anaconda lxml --yes
!conda install -c conda-forge tabulate --yes
!conda install -c anaconda beautifulsoup4 --yes
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab


import pandas as pd
import requests
import folium # map rendering library
from bs4 import BeautifulSoup
from tabulate import tabulate

print("Initial set-up all complete")

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Solving environment: done


  current version: 4.5.11
  latest version: 4.7.12

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - tabulate


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge

The following packages will be UPDATED:

    certifi: 2019.9.11-py36_0 anaconda --> 2019.9.11-py36_0  conda-forge

The following packages will be DOWNGRADED:

    openssl: 1.1.1-h7b6447c_0 anaconda --> 1.1.1c-h516909a_0 conda-forge


Downloading and Extracting Packages
certifi-2019.9.11  

## 2. Retrieve the data for Toronto and place into a dataframe

In [14]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))
df = df[0]
df.head()


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## 3. Exclude those rows with Borough of "Not assigned" and prin

In [15]:
df = df[df["Borough"]!="Not assigned"]
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


## 4. Concatenate the Neighbourhood and/or reassign it to the Borough

In [16]:
#df["NewNeighbourhood"] = df.groupby('Postcode')['Neighbourhood'].apply(lambda x: "%s" % ', '.join(x))
df = df.groupby('Postcode').agg({'Borough':'min','Neighbourhood':', '.join})
print(df[df['Neighbourhood'] == "Not assigned"]['Neighbourhood'].count())
df.loc[df['Neighbourhood'] == "Not assigned",'Neighbourhood'] = df['Borough']
print(df[df['Neighbourhood'] == "Not assigned"]['Neighbourhood'].count())
df = df.reset_index()
df.head()

1
0


Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


## 5. Display the shape of the dataframe

In [17]:
df.shape

(103, 3)

## 6. Create getGeoCode function to get help get the Latitude and Longitude values.

In [21]:
geocode = pd.read_csv('Geospatial_Coordinates.csv')

print(geocode.count())
# initialize your variable to None
def getGeoCode(Postcode):
    return geocode[geocode['Postal Code'] == Postcode][{'Latitude','Longitude'}]

print("Function Defined")
geocode.head()

Postal Code    103
Latitude       103
Longitude      103
dtype: int64
Function Defined


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## 7. Assign Latitude and Longitude for each row in the dataframe

In [26]:
df['Latitude'] = getGeoCode(df['Postcode'])['Latitude']
df['Longitude'] = getGeoCode(df['Postcode'])['Longitude']
print(df[df['Postcode']=='M5G'])
df.head()

   Postcode           Borough       Neighbourhood   Latitude  Longitude
57      M5G  Downtown Toronto  Central Bay Street  43.657952 -79.387383


Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## 8. Select only those Boroughs with "Toronto" in the name

In [36]:
df = df[df['Borough'].str.contains('Toronto')]
df.shape

(38, 5)

## 9. Print Map of Toronto with each Borough remaining.

In [38]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[43.6532, -79.3832], zoom_start=11)

# add markers to map
for lat, lng, borough, neighbourhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighbourhood']):
    label = '{}, {}'.format(neighbourhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

## 10. Observe what the map tells us...
### - It look like "downtown" has a lot more Boroughs.
### - When looking at clustering, it might make sense to use 3 or 4 clusters.