In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import urllib.request, urllib.parse, urllib.error
from urllib.request import urlopen

**Reading wikipedia web-page**

In [3]:
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
html = urllib.request.urlopen(url).read()
soup = BeautifulSoup(html, "html.parser")

**Creating the dataframe from the table**

In [4]:
table = soup.find_all('table') # Finds all tables in web-page
table2 = table[0] # The first table is the one we need
df = pd.DataFrame(columns=['Postal Code', 'Borough', 'Neighborhood'])
for row in table2.find_all('tr'): # Reads each row of the table
    l =[]
    columns = row.find_all('td')
    for column in columns:
        l.append(column.get_text().strip()) # Reads each value and adds to the list
    if l != []: # First list is an empty list so I skip it
        if l[1] != "Not assigned": # If borough name is not assigned row is skipped
            if l[2] == "Not assigned": # If neighborhood's name is not assigned takes the name of the borough
                l[2] = l[1]
            y = 0
            for i in df["Postal Code"]:
                if i == l[0]:
                    y = 1
            if y == 1:
                df.at[df[df['Postal Code']==l[0]].index.tolist()[0],'Neighborhood'] = df.at[df[df['Postal Code']==l[0]].index.tolist()[0],'Neighborhood'] + ", " + l[2] # If postal code is already in the dataframe the neighborhood is just added to the column
            else:
                df = df.append({'Postal Code':l[0], 'Borough':l[1], 'Neighborhood':l[2]}, ignore_index=True) # If it's a new postal code the whole row is appended
        else: continue
    else: continue
        
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


**Reading the csv file (I could use geocoder package instead but it doesn't work for me)**

In [7]:
df.to_excel("df.xlsx")

In [8]:
c = pd.read_csv('https://cocl.us/Geospatial_data')

**Adding the latitudes and longitudes to the dataframe**

In [9]:
for pc in df["Postal Code"]:
    df.at[df[df['Postal Code']==pc].index.tolist()[0],'Latitude'] = c.at[c[c['Postal Code']==pc].index.tolist()[0],'Latitude']
    df.at[df[df['Postal Code']==pc].index.tolist()[0],'Longitude'] = c.at[c[c['Postal Code']==pc].index.tolist()[0],'Longitude']
    
df.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.65426,-79.360636
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
4,M7A,Queen's Park,Queen's Park,43.662301,-79.389494


In [11]:
df.to_excel("df01.xlsx")

**Getting the geographical coordinates of Toronto**

In [6]:
import geocoder
from geopy.geocoders import Nominatim

address = 'Toronto'

geolocator = Nominatim(user_agent="explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.653963, -79.387207.


**Making a map of Toronto and adding Neighborhood markers**

In [7]:
import folium

map_tor = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tor)  
    
map_tor

**Foursquare credentials**

In [8]:
# @hidden_cell
CLIENT_ID = '3B4FDA2F4NEJYTDAEEFFFINB3Z4ELWP3KOQQMVJYN100FEYK' # your Foursquare ID
CLIENT_SECRET = 'QZOMOQMHG4BKQJLYMHHUVOEZACEMCZGNGK2T212BHXCDJYUO' # your Foursquare Secret
VERSION = '20180605'

**Creating a function for getting 30 food venues of a neighborhood**

In [9]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&section={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius,
            30, 
            'food')
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)



**Pulling all the venues**

In [10]:
import requests

toronto_food_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

toronto_food_venues.head()

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Parkwoods,43.753259,-79.329656,KFC,43.754387,-79.333021,Fast Food Restaurant
1,Parkwoods,43.753259,-79.329656,Bella Vita Catering & Private Chef Service,43.756651,-79.331524,BBQ Joint
2,Victoria Village,43.725882,-79.315572,Portugril,43.725819,-79.312785,Portuguese Restaurant
3,Victoria Village,43.725882,-79.315572,The Frig,43.727051,-79.317418,French Restaurant
4,Victoria Village,43.725882,-79.315572,Latvian Centre Food Market,43.725677,-79.318248,Deli / Bodega


**Dropping duplicate venues**

In [11]:
toronto_food_venues.drop_duplicates(subset=['Venue', 'Venue Latitude', 'Venue Longitude'], inplace=True)

**Finding which of them are Greek restaurants**

In [12]:
gr_re = toronto_food_venues[toronto_food_venues['Venue Category'] == 'Greek Restaurant'].reset_index(drop=True)
gr_re

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,"Harbourfront, Regent Park",43.65426,-79.360636,Souvlaki Express,43.655584,-79.364438,Greek Restaurant
1,Queen's Park,43.662301,-79.389494,Mr. Souvlaki,43.659412,-79.390558,Greek Restaurant
2,Berczy Park,43.644771,-79.373306,Alexandro's World Famous Gyros,43.641663,-79.375214,Greek Restaurant
3,"Adelaide, King, Richmond",43.650571,-79.384568,Estiatorio Volos,43.650329,-79.384533,Greek Restaurant
4,"Fairview, Henry Farm, Oriole",43.778517,-79.346556,Jimmy The Greek,43.778245,-79.343322,Greek Restaurant
5,East Toronto,43.685347,-79.338106,Command Centre,43.687667,-79.33949,Greek Restaurant
6,"Little Portugal, Trinity",43.647927,-79.41975,Mamakas Taverna,43.646042,-79.419679,Greek Restaurant
7,"The Danforth West, Riverdale",43.679557,-79.352188,Pantheon,43.677621,-79.351434,Greek Restaurant
8,"The Danforth West, Riverdale",43.679557,-79.352188,Mezes,43.677962,-79.350196,Greek Restaurant
9,"The Danforth West, Riverdale",43.679557,-79.352188,Messini Authentic Gyros,43.677827,-79.350569,Greek Restaurant


**How many Greek restaurants there are per neighborhood**

In [13]:
gr = gr_re.groupby(["Neighborhood"])["Venue Category"].count().reset_index(name="Count")
gr

Unnamed: 0,Neighborhood,Count
0,"Adelaide, King, Richmond",1
1,"Bedford Park, Lawrence Manor East",1
2,Berczy Park,1
3,Davisville,1
4,East Toronto,1
5,"Fairview, Henry Farm, Oriole",1
6,"Harbourfront, Regent Park",1
7,"Little Portugal, Trinity",1
8,Queen's Park,1
9,"The Danforth West, Riverdale",12


**Getting the neighborhoods that don't have Greek restaurants**

In [14]:
pn = df[~df['Neighborhood'].isin(gr['Neighborhood'])]
pn.head()

Unnamed: 0,Postal Code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.464763
5,M9A,Etobicoke,Islington Avenue,43.667856,-79.532242
6,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353


**Finding 30 differnet venues per neighborhood**

In [15]:
names = pn['Neighborhood']
latitudes = pn['Latitude']
longitudes = pn['Longitude']

toronto_venues = pd.DataFrame()

for name, lat, lng in zip(names, latitudes, longitudes):
    url = 'https://api.foursquare.com/v2/venues/search?&client_id={}&client_secret={}&v={}&ll={},{}&radius=200&limit=30'.format(
        CLIENT_ID, 
        CLIENT_SECRET, 
        VERSION, 
        lat, 
        lng)
    
    results = requests.get(url).json()["response"]['venues']
    
    if results != {}:
        for i in results:
            venues_list=[]
            idn = i['id']
            url2 ='https://api.foursquare.com/v2/venues/{}?&client_id={}&client_secret={}&v={}'.format(
                idn,
                CLIENT_ID,
                CLIENT_SECRET,
                VERSION)
            
            results2 = requests.get(url2).json()["response"]
            if results2 != {}:
                venues_list.append([(
                    name,
                    lat,
                    lng,
                    idn, 
                    results2['venue']['likes']['count'])])
                
                toronto_venues = toronto_venues.append({'Neighborhood':venues_list[0][0][0],
                                                        'Latitude':venues_list[0][0][1],
                                                        'Longitude':venues_list[0][0][2],
                                                        'Venue ID':venues_list[0][0][3],
                                                        'Venue Likes':venues_list[0][0][4]}, ignore_index=True)
            else: continue
    else: continue

toronto_venues.head()

Unnamed: 0,Latitude,Longitude,Neighborhood,Venue ID,Venue Likes
0,43.753259,-79.329656,Parkwoods,4e8d9dcdd5fbbbb6b3003c7b,12.0
1,43.753259,-79.329656,Parkwoods,4e039defd22d4cebf370894a,1.0
2,43.753259,-79.329656,Parkwoods,4d1429d78f4c60fccab8ff1a,2.0
3,43.753259,-79.329656,Parkwoods,51fafa34498ee94bf297e248,1.0
4,43.753259,-79.329656,Parkwoods,4cd43dd34ebba0907ccea8d6,0.0


**Dropping duplicates and finding the neighborhoods with most likes**

In [16]:
toronto_venues.drop_duplicates(subset=['Venue ID'], inplace=True)
tv = toronto_venues.groupby(["Neighborhood"])["Venue Likes"].sum().reset_index()

**Sorting by Sum**

In [17]:
tv.sort_values(by=['Venue Likes'], ascending=False, inplace=True)
tv.reset_index(drop=True)

Unnamed: 0,Neighborhood,Venue Likes
0,"Ryerson, Garden District",1062.0
1,St. James Town,553.0
2,"Flemingdon Park, Don Mills South",144.0
3,"Cloverdale, Islington, Martin Grove, Princess ...",108.0
4,"Lawrence Heights, Lawrence Manor",85.0
5,Humewood-Cedarvale,48.0
6,Parkwoods,42.0
7,Victoria Village,42.0
8,"Rouge, Malvern",40.0
9,"Highland Creek, Rouge Hill, Port Union",33.0
