# Capstone Project 
# The Battle of Neighborhoods in London: Ethiopian Food

### 1. Let us make all the dependencies ready

In [1]:
import requests
!pip install BeautifulSoup4
!pip install requests
!pip install lxml
import lxml
from lxml import html
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
# import k-means from clustering stage
from sklearn.cluster import KMeans
# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.8.3
  latest version: 4.8.4

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.8.3
  latest version: 4.8.4

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.



### 2. Data Preparation

##### Use Beautiful Soup to requests for Web Scaping

In [2]:
df=pd.DataFrame(columns=['PostalCode','Borough','Neighborhood'])

In [3]:
source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M").text
soup = BeautifulSoup(source, 'lxml')

table = soup.find("table")
table_rows = table.tbody.find_all("tr")

res = []
for tr in table_rows:
    td = tr.find_all("td")
    row = [tr.text for tr in td]
        # Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.
    if row != [] and row[1] != "Not assigned":
        # If a cell has a borough but a "Not assigned" neighborhood, then the neighborhood will be the same as the borough.
        if "Not assigned" in row[2]: 
            row[2] = row[1]
        res.append(row)

In [4]:
# Dataframe with 3 columns
df = pd.DataFrame(res, columns = ["PostalCode", "Borough", "Neighborhood"])
df["Neighborhood"] = df["Neighborhood"].str.replace("\n","")
df["PostalCode"] = df["PostalCode"].str.replace("\n","")
df["Borough"] = df["Borough"].str.replace("\n","")
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


##### Deal with unassigned and duplicate entries

In [5]:
df=df.drop(df[(df.Borough=="Not assigned")].index)
df=df.drop(df[(df.Neighborhood=="Not assigned")].index)
df2=df.groupby('PostalCode')['Neighborhood'].apply(lambda x: ', '.join(x))
df2=df2.reset_index(drop=False)
df3=pd.merge(df,df2,on='PostalCode')
df3.drop_duplicates(subset='PostalCode',inplace=True)
df3.drop(['Neighborhood_x'],axis=1,inplace=True)
df3.rename(columns={'Neighborhood_y':'Neighborhood'},inplace=True)
df3=df3.reset_index(drop=False)
df3.drop(['index'],axis=1,inplace=True)
df3.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


##### Get the Geographical data

In [6]:
geodf=pd.read_csv('http://cocl.us/Geospatial_data')
geodf.rename(columns={'Postal Code':'PostalCode'},inplace=True)
df_toronto= pd.merge(geodf,df3, on='PostalCode')
df_toronto=df_toronto[['PostalCode','Borough','Neighborhood','Latitude','Longitude']]
address = 'Toronto, ON, Canada'
geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
Toronto_data=df_toronto[df_toronto['Borough'].str.contains("Toronto")].reset_index(drop=True)
Toronto_data.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


### 3. Create a map of Toronto with neighborhoods superimposed on top

In [7]:
map_tor = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Borough'], Toronto_data['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_tor)  
    
map_tor

### 4. Prepare for Forsquare call

In [8]:
CLIENT_ID = '2YQXR4Z4BQA2ECTDAQYDAIFIOFT0D1LQNTNAQZBHBT5JL2WZ' # your Foursquare ID
CLIENT_SECRET = 'CLCYVKROZX1UF2LFZU4NQWL2RD0A0K4MKGXDPQURSHEYSHWV' # your Foursquare Secret
VERSION = '20200802' # Foursquare API version
neighborhood_latitude = Toronto_data.loc[0, 'Latitude'] # neighborhood latitude value
neighborhood_longitude = Toronto_data.loc[0, 'Longitude'] # neighborhood longitude value

neighborhood_name = Toronto_data.loc[0, 'Neighborhood'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(neighborhood_name, 
                                                               neighborhood_latitude, 
                                                               neighborhood_longitude))

Latitude and longitude values of The Beaches are 43.67635739999999, -79.2930312.


### 5. Make call to Foursquare API and obtain nearby venues

In [38]:
LIMIT = 5000 # limit of number of venues returned by Foursquare API

radius = 10000 # define radius

 # create URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    neighborhood_latitude, 
    neighborhood_longitude, 
    radius, 
    LIMIT)
results = requests.get(url).json()

In [39]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [40]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,name,categories,lat,lng
0,The Fox Theatre,Indie Movie Theater,43.672801,-79.287272
1,Kew Gardens,Park,43.669038,-79.298538
2,Kew-Balmy Beach,Beach,43.667372,-79.295312
3,Woodbine Beach,Beach,43.663112,-79.306374
4,Hollandaise Diner,Breakfast Spot,43.686527,-79.308897


In [41]:
# Get for each neighborhood nearby venues within radius of 10000
def getNearbyVenues(names, latitudes, longitudes, radius=10000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### 6. Process the Venues Data

##### Group venues in a neighborhood

In [42]:
Toronto_venues = getNearbyVenues(names=Toronto_data['Neighborhood'],
                                   latitudes=Toronto_data['Latitude'],
                                   longitudes=Toronto_data['Longitude']
                                  )
Toronto_venues.groupby('Neighborhood').count()
# one hot encoding
Toronto_onehot = pd.get_dummies(Toronto_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Toronto_onehot['Neighborhood'] = Toronto_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [Toronto_onehot.columns[-1]] + list(Toronto_onehot.columns[:-1])
Toronto_onehot =Toronto_onehot[fixed_columns]

# Group by Neighborhood
Toronto_grouped = Toronto_onehot.groupby('Neighborhood').mean().reset_index()

The Beaches
The Danforth West, Riverdale
India Bazaar, The Beaches West
Studio District
Lawrence Park
Davisville North
North Toronto West,  Lawrence Park
Davisville
Moore Park, Summerhill East
Summerhill West, Rathnelly, South Hill, Forest Hill SE, Deer Park
Rosedale
St. James Town, Cabbagetown
Church and Wellesley
Regent Park, Harbourfront
Garden District, Ryerson
St. James Town
Berczy Park
Central Bay Street
Richmond, Adelaide, King
Harbourfront East, Union Station, Toronto Islands
Toronto Dominion Centre, Design Exchange
Commerce Court, Victoria Hotel
Roselawn
Forest Hill North & West, Forest Hill Road Park
The Annex, North Midtown, Yorkville
University of Toronto, Harbord
Kensington Market, Chinatown, Grange Park
CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport
Stn A PO Boxes
First Canadian Place, Underground city
Christie
Dufferin, Dovercourt Village
Little Portugal, Trinity
Brockton, Parkdale Village, Exhibition Place
High

In [43]:
Toronto_venues

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,The Beaches,43.676357,-79.293031,The Fox Theatre,43.672801,-79.287272,Indie Movie Theater
1,The Beaches,43.676357,-79.293031,Kew Gardens,43.669038,-79.298538,Park
2,The Beaches,43.676357,-79.293031,Kew-Balmy Beach,43.667372,-79.295312,Beach
3,The Beaches,43.676357,-79.293031,Woodbine Beach,43.663112,-79.306374,Beach
4,The Beaches,43.676357,-79.293031,Hollandaise Diner,43.686527,-79.308897,Breakfast Spot
...,...,...,...,...,...,...,...
3895,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Union Pearson Express,43.644362,-79.383199,Train Station
3896,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Steam Whistle Brewing,43.641752,-79.387089,Brewery
3897,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Black Camel,43.677016,-79.389367,BBQ Joint
3898,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Art Gallery of Ontario,43.654003,-79.392922,Art Gallery


##### Explore Restruant

In [45]:
Toronto_Venues_only_restaurant = Toronto_venues[Toronto_venues['Venue Category'].str.contains('Restaurant')].reset_index(drop=True)
Toronto_Venues_only_restaurant.index = np.arange(1, len(Toronto_Venues_only_restaurant )+1)
print (Toronto_Venues_only_restaurant['Venue Category'].value_counts())

Japanese Restaurant              99
Restaurant                       84
French Restaurant                59
American Restaurant              50
Mediterranean Restaurant         48
Asian Restaurant                 45
Italian Restaurant               42
Vegetarian / Vegan Restaurant    41
Vietnamese Restaurant            33
Caribbean Restaurant             32
Theme Restaurant                 31
Middle Eastern Restaurant        30
Doner Restaurant                 28
Seafood Restaurant               26
Ramen Restaurant                 26
Mexican Restaurant               23
Spanish Restaurant               22
Indian Restaurant                17
Peruvian Restaurant              16
Tapas Restaurant                 15
Greek Restaurant                 12
Latin American Restaurant         7
Eastern European Restaurant       7
Egyptian Restaurant               5
Ethiopian Restaurant              3
Turkish Restaurant                1
Tibetan Restaurant                1
Name: Venue Category, dtype:

In [46]:
print('There are {} uniques categories.'.format(len(Toronto_Venues_only_restaurant['Venue Category'].unique())))

There are 27 uniques categories.


##### What are the top ten Restaurant types in Toronto?

In [47]:
# create a dataframe of top 10 categories
Toronto_Rest_Venues_Top10 = Toronto_Venues_only_restaurant['Venue Category'].value_counts()[0:10].to_frame(name='Frequency')
Toronto_Rest_Venues_Top10=Toronto_Rest_Venues_Top10.reset_index()
#Tokyo_5_Dist_Venues_Top10

Toronto_Rest_Venues_Top10.rename(index=str, columns={"index": "Venue_Category"}, inplace=True)
Toronto_Rest_Venues_Top10

Unnamed: 0,Venue_Category,Frequency
0,Japanese Restaurant,99
1,Restaurant,84
2,French Restaurant,59
3,American Restaurant,50
4,Mediterranean Restaurant,48
5,Asian Restaurant,45
6,Italian Restaurant,42
7,Vegetarian / Vegan Restaurant,41
8,Vietnamese Restaurant,33
9,Caribbean Restaurant,32


##### Where does the restaurants located?

In [48]:
Toronto_Venues_only_restaurant

Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
1,The Beaches,43.676357,-79.293031,Lake Inez,43.672520,-79.320712,Asian Restaurant
2,The Beaches,43.676357,-79.293031,The Wren,43.682467,-79.328079,American Restaurant
3,The Beaches,43.676357,-79.293031,Maha's Fine Egyptian Cuisine,43.671758,-79.328444,Egyptian Restaurant
4,The Beaches,43.676357,-79.293031,Completo,43.662550,-79.334049,Latin American Restaurant
5,The Beaches,43.676357,-79.293031,Ha Noi 3 Seasons,43.665578,-79.352153,Asian Restaurant
...,...,...,...,...,...,...,...
799,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,The Keg Steakhouse + Bar - Esplanade,43.646712,-79.374768,Restaurant
800,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Kinka Izakaya Original,43.660596,-79.378891,Japanese Restaurant
801,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Miku,43.641374,-79.377531,Japanese Restaurant
802,"Business reply mail Processing Centre, South C...",43.662744,-79.321558,Terroni,43.650927,-79.375602,Italian Restaurant


In [67]:
Toronto_Venues_only_restaurant.groupby('Neighborhood').count().sort_values('Venue', ascending=False)

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Christie,26,26,26,26,26,26
"University of Toronto, Harbord",25,25,25,25,25,25
"The Annex, North Midtown, Yorkville",25,25,25,25,25,25
"Kensington Market, Chinatown, Grange Park",24,24,24,24,24,24
"Richmond, Adelaide, King",24,24,24,24,24,24
"Brockton, Parkdale Village, Exhibition Place",24,24,24,24,24,24
"Little Portugal, Trinity",24,24,24,24,24,24
"CN Tower, King and Spadina, Railway Lands, Harbourfront West, Bathurst Quay, South Niagara, Island airport",24,24,24,24,24,24
"Queen's Park, Ontario Provincial Government",23,23,23,23,23,23
Central Bay Street,23,23,23,23,23,23


In [68]:
Toronto_Venues_only_restaurant.groupby('Venue Category').count().reset_index().sort_values('Venue', ascending=False)


Unnamed: 0,Venue Category,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
11,Japanese Restaurant,99,99,99,99,99,99
18,Restaurant,84,84,84,84,84,84
7,French Restaurant,59,59,59,59,59,59
0,American Restaurant,50,50,50,50,50,50
13,Mediterranean Restaurant,48,48,48,48,48,48
1,Asian Restaurant,45,45,45,45,45,45
10,Italian Restaurant,42,42,42,42,42,42
25,Vegetarian / Vegan Restaurant,41,41,41,41,41,41
26,Vietnamese Restaurant,33,33,33,33,33,33
2,Caribbean Restaurant,32,32,32,32,32,32
