In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [20]:
#Going to be scrapping data from the web so will use beautiful soup

import requests
import urllib.request
import time
from bs4 import BeautifulSoup

In [21]:
#Connect to the url
url = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text

#Parse HTML and save to BeautifulSoup object
soup = BeautifulSoup(url,'lxml')

In [22]:
table_post = soup.find('table')
fields = table_post.find_all('td')

postcode = []
borough = []
neighbourhood = []

for i in range(0, len(fields), 3):
    postcode.append(fields[i].text.strip())
    borough.append(fields[i+1].text.strip())
    neighbourhood.append(fields[i+2].text.strip())
        
df = pd.DataFrame(data=[postcode, borough, neighbourhood]).transpose()
df.columns = ['Postcode', 'Borough', 'Neighbourhood']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront


## Want to remove cells with a borough and neighbourhood that is "Not assigned"

In [23]:
df['Borough'].replace("Not assigned", np.nan, inplace=True)
df.dropna(subset=['Borough'],inplace = True)

df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [24]:
df_new = df.groupby(['Postcode', 'Borough'])['Neighbourhood'].apply(', '.join).reset_index()
df_new.columns = ['Postcode', 'Borough', 'Neighbourhood']
df_new

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae
5,M1J,Scarborough,Scarborough Village
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park"
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge"
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West"
9,M1N,Scarborough,"Birch Cliff, Cliffside West"


In [25]:
#Can see that for label 93 the neighbourhood is not assigned. Need to replace this with 'Queen's Park'.

df_new['Neighbourhood'].replace("Not assigned","Queen's Park",inplace = True )
df_new.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [26]:
#Want to see the number of rows in the dataframe.
df_new.shape

(103, 3)

## Will now begin to load the geospatial data 

In [30]:
coords = pd.read_csv("http://cocl.us/Geospatial_data")
coords.columns = ['Postcode','Latitude','Longitude']

In [31]:
df_new1 = pd.merge(df_new,coords,on='Postcode',how='inner') #Creates the same dataset as the one given in the assignment.
df_new1.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


## Now to explore and cluster the neighbourhoods in Toronto.

In [34]:
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    altair-4.0.1               |             py_0         575 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    folium-0.5.0               |             py_0          45 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.0 MB

The following NEW packages will be 

In [37]:
#Time to import folium for the maps and KMeans for the clustering.

from sklearn.cluster import KMeans
import folium
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [38]:
address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of the City of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of the City of Toronto are 43.653963, -79.387207.


  app.launch_new_instance()


In [49]:
tor = folium.Map(location = [43.653963, -79.387207],zoom_start=11)
tor

## Will do what the assignment says and see what neighbourhoods have Toronto in them

In [50]:
for lat, lng, borough, neighborhood in zip(df_new1['Latitude'], df_new1['Longitude'], df_new1['Borough'], df_new1['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3199cc',
        fill_opacity=0.5,
        parse_html=False).add_to(tor)  
    
tor

In [52]:
import json
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

## Will explore the 5th neighbourhood.

In [53]:
df_tor = df_new1.copy()
df_tor.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [54]:
df_tor.loc[0,'Neighbourhood']

'Rouge, Malvern'

## Will now explore the latitude and longitude coordinates 

In [56]:
neighbourhood_latitude = df_tor.loc[5, 'Latitude'] # neighborhood latitude value
neighbourhood_longitude = df_tor.loc[5, 'Longitude'] # neighborhood longitude value

neighbourhood_name = df_tor.loc[5, 'Neighbourhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighbourhood_name, 
                                                               neighbourhood_latitude, 
                                                               neighbourhood_longitude))

Latitude and longitude values of Scarborough Village are 43.7447342, -79.23947609999999.


#### Now, let's get the top 100 venues that are in Malvern Rouge within a radius of 500 meters.

In [None]:
CLIENT_ID = 'cleint ID' # your Foursquare ID
CLIENT_SECRET = 'Client secret' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

In [58]:
# type your answer here
LIMIT = 100 # limit of number of venues returned by Foursquare API

radius = 500 # define radius

# create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
url

'https://api.foursquare.com/v2/venues/explore?&client_id=UYHTMA0FUKEGJKT3PSM4R5WCNE1IVLANAHLAGBFYI2NRXODL&client_secret=0SPPBUEZPMDVIMMYHU0TVKGNAQJWWS2SN4AXWWA4LQ3KDTUM&v=20180605&ll=43.7447342,-79.23947609999999&radius=500&limit=100'

### Send the GET request and examine the results

In [59]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5e3880bc0be7b4001b367d5c'},
  'headerLocation': 'Eglinton East',
  'headerFullLocation': 'Eglinton East, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 1,
  'suggestedBounds': {'ne': {'lat': 43.749234204500006,
    'lng': -79.23325872538938},
   'sw': {'lat': 43.7402341955, 'lng': -79.2456934746106}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '5150a8dae4b045dfb6581c85',
       'name': 'McCowan Park',
       'location': {'lat': 43.74508851212816,
        'lng': -79.239335687338,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.74508851212816,
          'lng': -79.239335687338}],
        'distance': 41,
        'cc': 'CA',
        'country': 'Canada',
        'formattedAddress': ['Cana

In [60]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

### Clear json and structure into a pandas dataframe

In [61]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,McCowan Park,Playground,43.745089,-79.239336


### As can be seen above the only thing within 500 metres of Malvern Rouge is a playground. 

In [63]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

1 venues were returned by Foursquare.
