# Assignment: Using the Dataframe of Toronto neighbours visualize the neighbours

#### Code part from the  previous assignment

In [1]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import requests
import geocoder

#Use the Notebook to build the code to scrape the following Wikipedia page
wiki_link='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
raw_wiki_page_content = requests.get(wiki_link).text

raw_wiki_page_content_xml = BeautifulSoup(raw_wiki_page_content,'xml')

table = raw_wiki_page_content_xml.find('table')
#print(table.prettify())

Postcode      = []
Borough       = []
Neighbourhood = []

line_number = 1
for tr_cell in table.find_all('tr'):
    if line_number > 1 :
        column_number = 1
        Postcode_temp = None
        Borough_temp = None
        Neighbourhood_temp = None
        for td_cell in tr_cell.find_all('td'):
            if column_number == 1 :
                Postcode_temp = td_cell.text
            elif column_number == 2 :
                Borough_temp = td_cell.text
            else :
                Neighbourhood_temp = td_cell.text
            column_number = column_number + 1
        #Ignore cells with a borough that is Not assigned
        if Borough_temp != 'Not assigned':
            Postcode.append(Postcode_temp.strip())
            Borough.append(Borough_temp.strip())
            Neighbourhood_temp = Neighbourhood_temp.strip()
            if Neighbourhood_temp != 'Not assigned':
                Neighbourhood.append(Neighbourhood_temp)
            else :
                #If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough
                Neighbourhood.append(Borough_temp)
    line_number = line_number + 1
    
toronto_df = pd.DataFrame({"PostalCode":Postcode,"Borough":Borough,"Neighbourhood":Neighbourhood})

toronto_df = toronto_df.groupby('PostalCode').agg({'Borough':'first', 
                             'Neighbourhood': ', '.join}).reset_index()
print("Shape of the Dataframe:"+str(toronto_df.shape))

#fallback list of coordinates from csv
fallback_url = 'http://cocl.us/Geospatial_data'
fallback_coordinates_df = pd.read_csv(fallback_url, delimiter = ',')
fallback_coordinates_df.columns = ['PostalCode', 'Latitude', 'Longitude']

Postcode      = []
Borough       = []
Neighbourhood = []
Latitude = []
Longitude = []
for index, row in toronto_df.iterrows():
    Postcode_temp = row['PostalCode']
    Postcode_temp = Postcode_temp.strip()
    Borough_temp = row['Borough']
    Borough_temp = Borough_temp.strip()
    Neighbourhood_temp = row['Neighbourhood']
    Neighbourhood_temp = Neighbourhood_temp.strip()
    #print('Postcode_temp:'+Postcode_temp)
    lat_lng_coords = None
    try_count = 0
    # try 5 times to get coordinates from internet
    while (lat_lng_coords is None and try_count < 5):
        g = geocoder.google('{}, Toronto, Ontario'.format(Postcode_temp))
        lat_lng_coords = g.latlng
        try_count = try_count + 1
    if lat_lng_coords != None :
        Latitude.append(lat_lng_coords[0])
        Longitude.append(lat_lng_coords[1])
        #print('Lat/Long of ' + Postcode_temp + ' is ' + str(lat_lng_coords[0]) + '/' + str(lat_lng_coords[1])  + ' from net')
    else :
        # if not fetched coordinate from net after 5 times trial, fetch it from fallback csv
        temp_df = fallback_coordinates_df[fallback_coordinates_df.PostalCode == Postcode_temp]
        Latitude.append(temp_df.iloc[0]['Latitude'])
        Longitude.append(temp_df.iloc[0]['Longitude'])
        #print('Lat/Long of ' + Postcode_temp + ' is ' + str(temp_df.iloc[0]['Latitude']) + '/' + str(temp_df.iloc[0]['Longitude']) + ' from csv')
    Postcode.append(Postcode_temp) 
    Borough.append(Borough_temp)
    Neighbourhood.append(Neighbourhood_temp)
    
toronto_df = pd.DataFrame({"PostalCode":Postcode,"Borough":Borough,"Neighbourhood":Neighbourhood,"Latitude":Latitude,"Longitude":Longitude})


Shape of the Dataframe:(103, 3)


# Create Mp of Toronto

#### [ Installed folium using : pip3 install folium ]

In [5]:
import folium

In [7]:
toronto_latitude = 43.6532; toronto_longitude = -79.3832 #From Google search
map_toronto = folium.Map(location = [toronto_latitude, toronto_longitude], zoom_start = 10.7)

# add markers to map
for lat, lng, borough, neighborhood in zip(toronto_df['Latitude'], toronto_df['Longitude'], toronto_df['Borough'], toronto_df['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_toronto)  
    

map_toronto

# Neighbours of North York (selected for analysis)

In [8]:
CLIENT_ID = 'J1JBTE1UXQVN0P0W2TV04LA2HMJ3ZYMOY0QOA42IWYCIG0CW'
CLIENT_SECRET = 'XWM0RWPJN11YUG34VHB1QQI0FSPBJOYE3AJ2RAHCQTIWRDUZ' # Removed manually after use
VERSION = '20190308' 

In [9]:
northyork_data = toronto_df[toronto_df['Borough'] == 'North York'].reset_index(drop=True)
northyork_data.head()


Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M2H,North York,Hillcrest Village,43.803762,-79.363452
1,M2J,North York,"Fairview, Henry Farm, Oriole",43.778517,-79.346556
2,M2K,North York,Bayview Village,43.786947,-79.385975
3,M2L,North York,"Silver Hills, York Mills",43.75749,-79.374714
4,M2M,North York,"Newtonbrook, Willowdale",43.789053,-79.408493


## Create a map of North York and its neighbourhoods


In [10]:
northyork_latitude = 43.7615; northyork_longitude = -79.4111 #From Google search

In [11]:
map_northyork = folium.Map(location=[northyork_latitude, northyork_longitude], zoom_start=12)

# add markers to map
for lat, lng, label in zip(northyork_data['Latitude'], northyork_data['Longitude'], northyork_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_northyork)  
    
map_northyork

## Get a neighbour of North York

In [14]:
neighborhood_latitude = northyork_data.loc[0, 'Latitude'] # neighbourhood latitude value
neighborhood_longitude = northyork_data.loc[0, 'Longitude'] # neighbourhood longitude value

neighborhood_name = northyork_data.loc[0, 'Neighbourhood'] # neighbourhood name

print('Latitude and longitude values of "{}" are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of "Hillcrest Village" are 43.8037622, -79.3634517.


## Get the top 100 venues in the neighborhood 'Hillcrest Village', from North York


In [21]:
LIMIT = 100
radius = 500
url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}'.format(CLIENT_ID, CLIENT_SECRET, northyork_latitude, neighborhood_longitude, VERSION, radius, LIMIT)

In [22]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5c83c0031ed2196e4ab47ef6'},
 'response': {'headerLocation': 'St. Andrew - Windfields',
  'headerFullLocation': 'St. Andrew - Windfields, Toronto',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 7,
  'suggestedBounds': {'ne': {'lat': 43.7660000045, 'lng': -79.3572325833373},
   'sw': {'lat': 43.756999995499996, 'lng': -79.3696708166627}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4ad771f0f964a520900a21e3',
       'name': 'The Keg Steakhouse + Bar',
       'location': {'address': '1977 Leslie Street',
        'lat': 43.75877501086392,
        'lng': -79.36106413332026,
        'labeledLatLngs': [{'label': 'display',
          'lat': 43.75877501086392,
          'lng': -79.36106413332026}],
        'distance': 358

In [23]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [25]:
import json # library to handle JSON files
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

venues = results['response']['groups'][0]['items']  
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,The Keg Steakhouse + Bar,Steakhouse,43.758775,-79.361064
1,DOT Furniture,Furniture / Home Store,43.759104,-79.361162
2,Moatfield Farm Park,Park,43.763798,-79.360314
3,Oriole GO Station,Train Station,43.765601,-79.364523
4,Kids fun city,Arcade,43.759308,-79.358912


In [26]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Get venues for each neighborhood in Scarborough

In [28]:
northyork_venues = getNearbyVenues(names=northyork_data['Neighbourhood'],
                                   latitudes=northyork_data['Latitude'],
                                   longitudes=northyork_data['Longitude']
                                  )

Hillcrest Village
Fairview, Henry Farm, Oriole
Bayview Village
Silver Hills, York Mills
Newtonbrook, Willowdale
Willowdale South
York Mills West
Willowdale West
Parkwoods
Don Mills North
Flemingdon Park, Don Mills South
Bathurst Manor, Downsview North, Wilson Heights
Northwood Park, York University
CFB Toronto, Downsview East
Downsview West
Downsview Central
Downsview Northwest
Victoria Village
Bedford Park, Lawrence Manor East
Lawrence Heights, Lawrence Manor
Glencairn
Maple Leaf Park, North Park, Upwood Park
Humber Summit
Emery, Humberlea


In [29]:
northyork_venues.head()


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Hillcrest Village,43.803762,-79.363452,Eagle's Nest Golf Club,43.805455,-79.364186,Golf Course
1,Hillcrest Village,43.803762,-79.363452,AY Jackson Pool,43.804515,-79.366138,Pool
2,Hillcrest Village,43.803762,-79.363452,Villa Madina,43.801685,-79.363938,Mediterranean Restaurant
3,Hillcrest Village,43.803762,-79.363452,Duncan Creek Park,43.805539,-79.360695,Dog Run
4,Hillcrest Village,43.803762,-79.363452,A.Y. Jackson Secondary School Track,43.805068,-79.366677,Athletics & Sports


In [30]:
northyork_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
"Bathurst Manor, Downsview North, Wilson Heights",18,18,18,18,18,18
Bayview Village,4,4,4,4,4,4
"Bedford Park, Lawrence Manor East",24,24,24,24,24,24
"CFB Toronto, Downsview East",3,3,3,3,3,3
Don Mills North,5,5,5,5,5,5
Downsview Central,4,4,4,4,4,4
Downsview Northwest,5,5,5,5,5,5
Downsview West,4,4,4,4,4,4
"Emery, Humberlea",3,3,3,3,3,3
"Fairview, Henry Farm, Oriole",66,66,66,66,66,66
