# Cuisine Vs Location

This note book contains the code that supports the report - Cuisine Vs Location

Installing all the libraries

In [1]:
!conda install -c conda-forge beautifulsoup4 --yes
!conda install -c conda-forge lxml --yes
!conda install -c conda-forge requests --yes
!conda install -c conda-forge geocoder --yes
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following packages will be UPDATED:

    beautifulsoup4: 4.6.0-py35h442a8c9_1 --> 4.6.3-py35_0 conda-forge

beautifulsoup4 100% |################################| Time: 0:00:00  37.36 MB/s
Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following packages will be UPDATED:

    libxml2: 2.9.4-h6b072ca_5     --> 2.9.8-h422b904_2     conda-forge
    libxslt: 1.1.29-hcf9102b_5    --> 1.1.32-h88dbc4e_2    conda-forge
    lxml:    4.1.0-py35ha401a81_0 --> 4.2.5-py35hc9114bc_0 conda-forge

libxml2-2.9.8- 100% |################################| Time: 0:00:00  15.27 MB/s
libxslt-1.1.32 100% |################################| Time: 0:00:00  70.00 MB/s
lxml-4.2.5-py3 100% |################################| Time: 0:00:00  24.48 MB/s
Fetchin

Importing the necessary modules from each of the libraries

In [2]:
# importing necessary modules
import requests
import pandas as pd
import numpy as np
import lxml
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium 

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

from sklearn.cluster import KMeans
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize
from geopy.geocoders import Nominatim

Reading the postal code information from Wikipedia page

In [3]:
# url for the wikipedia page
url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
source = requests.get(url).text

# cretaing a beautifulsoup obect
soup = BeautifulSoup(source,'lxml')

# creating & loading data into dataframe
pcode_df = pd.DataFrame()
i = 0
j = 0
table_pcode = soup.find('table')
for table_data in table_pcode.find_all('td'):
    col_data = table_data.text
    pcode_df.loc[i,j] = col_data
    j = j + 1
    if (j==3):
        i = i + 1
        j = 0

Modifying the data frame so that it can be used further in the code

In [4]:
# adding column names to the data frame
pcode_df.columns = ('PostalCode','Borough','Neighborhood')

# removing the \n haracter from the end of the string
pcode_df['Neighborhood'] = pcode_df['Neighborhood'].str.slice(stop = -1)

# removing the rows with borough value as not assigned
pcode_df=pcode_df[pcode_df.Borough != 'Not assigned']
pcode_df = pcode_df.reset_index(drop=True)

Concatenating the neighborhood values and also modifying the resulting value

In [5]:
# concatenating neighborhoods with same Postal Code
new_df = pd.DataFrame()
new_df = pcode_df.groupby('PostalCode').agg({'Neighborhood':lambda x: tuple(x)})
new_df.reset_index(inplace=True)

# converting the tuple into a string data type
new_df['Neighborhood'] = new_df['Neighborhood'].astype(str)
#removing the end and begenning braces and the single quotes
new_df['Neighborhood'] = new_df['Neighborhood'].str.slice(stop = -2)
new_df['Neighborhood'] = new_df['Neighborhood'].str.slice(start = 2)
new_df['Neighborhood'] = new_df['Neighborhood'].str.replace("'","")

Creating a data frame with unique postal code and borough information

In [6]:
df_borough = pcode_df
df_borough = df_borough.drop('Neighborhood',axis=1)
df_borough.drop_duplicates(inplace=True)

Creation of the final data frame by combining two intermediate data frames using Postal code as key

In [7]:
final_pcode_df = pd.DataFrame()
final_pcode_df = df_borough.merge(new_df, on='PostalCode')

Updating the data for rows missing Neighborhood values

In [8]:
loc_Neigh_NA = final_pcode_df.Neighborhood == 'Not assigned'
final_pcode_df.loc[loc_Neigh_NA,'Neighborhood'] = final_pcode_df.loc[loc_Neigh_NA,'Borough']
final_pcode_df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [9]:
# shape of the final dataframe
print('Shape of the Final DataFrame',final_pcode_df.shape)

Shape of the Final DataFrame (103, 3)


Reading the csv file with latitude and longitude value, modifying the column names and merge with the postal code data frame

In [10]:
# reading the csv file with latitude and longitude value
lat_long_df = pd.read_csv('http://cocl.us/Geospatial_data')
# modify column names inorder to perform the merge
lat_long_df.columns = ('PostalCode','Latitude','Longitude')
# merging data to create the new data set with latitude and longitude data
final_lat_long_df = final_pcode_df.merge(lat_long_df , on='PostalCode')
final_lat_long_df.head(2)

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572


In [11]:
final_lat_long_df.shape

(103, 5)

Initializing Foursquare Credentials

In [13]:
# The code was removed by Watson Studio for sharing.

Creating a function to retrive venue information from Forsquare 

In [14]:
def getNearbyVenues(names, latitudes, longitudes,search_query='restaurant',radius=500):
    
    neigh_venues_list=pd.DataFrame(columns=('Neighborhood','Neigh_Latitude','Neigh_Longitude','ID','Venue','Venue_Latitude', \
                  'Venue_Longitude','Venue_Category'))
    j = 0
    if (neigh_venues_list.shape[0] == 0):
        i = 0
    else:
        i = neigh_venues_list.shape[0]
    for name, lat, lng in zip(names, latitudes, longitudes):
        # create the API request URL
        url ='https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}&limit={}'.format(            
            CLIENT_ID,
            CLIENT_SECRET,            
            lat, 
            lng,
            VERSION,
            search_query,
            radius, 
            50)
        # make the GET request
        results = requests.get(url).json()["response"]['venues']   
        # store the relevant information for each nearby venue in adate frame
        for v in results:
            try:
                neigh_venues_list.loc[i,'ID'] = v['id']
                neigh_venues_list.loc[i,'Venue'] = v['name']
                neigh_venues_list.loc[i,'Venue_Latitude'] = v['location']['lat']
                neigh_venues_list.loc[i,'Venue_Longitude'] = v['location']['lng'] 
                neigh_venues_list.loc[i,'Venue_Category'] = v['categories'][0]['name']
            except:  
                neigh_venues_list.loc[i,'ID'] = np.nan
                neigh_venues_list.loc[i,'Venue'] = np.nan
                neigh_venues_list.loc[i,'Venue_Latitude'] = np.nan
                neigh_venues_list.loc[i,'Venue_Longitude'] = np.nan
                neigh_venues_list.loc[i,'Venue_Category'] = np.nan
            neigh_venues_list.loc[i,'Neighborhood'] = names[j]
            neigh_venues_list.loc[i,'Neigh_Latitude'] = latitudes[j]
            neigh_venues_list.loc[i,'Neigh_Longitude'] = longitudes[j]
            i = i + 1
        j = j + 1
    # return the data frame
    return(neigh_venues_list)

Creating and storing the venues information returned by the user defined funtion

In [15]:
toronto_venues = pd.DataFrame()
toronto_venues = getNearbyVenues(names=final_lat_long_df['Neighborhood'],
                                   latitudes=final_lat_long_df['Latitude'],
                                   longitudes=final_lat_long_df['Longitude']
                                  )
toronto_venues.head(2)

Unnamed: 0,Neighborhood,Neigh_Latitude,Neigh_Longitude,ID,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,"Harbourfront, Regent Park",43.6543,-79.3606,4f872713e4b0abaa00979185,Site Of Great Canary Restaurant,43.6533,-79.3579,Breakfast Spot
1,"Harbourfront, Regent Park",43.6543,-79.3606,5b9e897ac9a5170039679335,Ryan Restaurant,43.6557,-79.3641,Ethiopian Restaurant


Dropping any neighborhoods that do not have any restaurant

In [16]:
toronto_venues.dropna(axis=0,how='any',inplace=True)
toronto_venues.reset_index(drop=True,inplace=True)
print(toronto_venues.shape)
toronto_venues.head()

(559, 8)


Unnamed: 0,Neighborhood,Neigh_Latitude,Neigh_Longitude,ID,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,"Harbourfront, Regent Park",43.6543,-79.3606,4f872713e4b0abaa00979185,Site Of Great Canary Restaurant,43.6533,-79.3579,Breakfast Spot
1,"Harbourfront, Regent Park",43.6543,-79.3606,5b9e897ac9a5170039679335,Ryan Restaurant,43.6557,-79.3641,Ethiopian Restaurant
2,"Harbourfront, Regent Park",43.6543,-79.3606,4ae5b91ff964a520a6a121e3,Morning Glory Cafe,43.6539,-79.3611,Breakfast Spot
3,"Harbourfront, Regent Park",43.6543,-79.3606,5bf9f9f7135b39002cd82007,Weheliye Restaurant,43.6585,-79.3657,African Restaurant
4,"Harbourfront, Regent Park",43.6543,-79.3606,4ac3e6cef964a520629d20e3,Archeo,43.6507,-79.3594,Italian Restaurant


List out the unique restaurant categories

In [17]:
toronto_venues['Venue_Category'].unique()

array(['Breakfast Spot', 'Ethiopian Restaurant', 'African Restaurant',
       'Italian Restaurant', 'Thai Restaurant', 'Vietnamese Restaurant',
       'Diner', 'Fast Food Restaurant', 'Sushi Restaurant', 'Pub',
       'Sandwich Place', 'American Restaurant', 'Restaurant',
       'Chinese Restaurant', 'Asian Restaurant', 'Dim Sum Restaurant',
       'Lounge', 'Theme Restaurant', 'Indian Restaurant',
       'Modern European Restaurant', 'Caribbean Restaurant',
       'New American Restaurant', 'Gastropub', 'Japanese Restaurant',
       'Middle Eastern Restaurant', 'Movie Theater',
       'Molecular Gastronomy Restaurant', 'Spanish Restaurant', 'Bar',
       'Mongolian Restaurant', 'Food', 'Beer Bar', 'French Restaurant',
       'Szechuan Restaurant', 'Nightclub', 'Hakka Restaurant',
       'Greek Restaurant', 'Portuguese Restaurant', 'Café',
       'Eastern European Restaurant', 'Wine Bar', 'Deli / Bodega',
       'Brewery', 'Hotel', 'Steakhouse', 'Cuban Restaurant',
       'Mexican Rest

Reviewing unique categories to get more insight

In [18]:
toronto_venues[toronto_venues['Venue_Category'] == 'Food'].head(15)
toronto_venues[toronto_venues['Venue_Category'] == 'Hakka Restaurant'].head(15)
toronto_venues[toronto_venues['Venue_Category'] == 'Asian'].head(15)
toronto_venues[toronto_venues['Venue_Category'] == 'Restaurant'].head(15)
toronto_venues[toronto_venues['Venue_Category'] == 'Noodle House'].head(15)

Unnamed: 0,Neighborhood,Neigh_Latitude,Neigh_Longitude,ID,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
410,"Chinatown, Grange Park, Kensington Market",43.6532,-79.4,4b266f05f964a520657b24e3,Goldstone Noodle Restaurant 金石,43.6523,-79.398,Noodle House


Dropping the word restaurant from the category type value for ease of use

In [19]:
toronto_venues['Venue_Category'] = toronto_venues['Venue_Category'].str.replace(" Restaurant","")
toronto_venues['Venue_Category'].unique()

array(['Breakfast Spot', 'Ethiopian', 'African', 'Italian', 'Thai',
       'Vietnamese', 'Diner', 'Fast Food', 'Sushi', 'Pub',
       'Sandwich Place', 'American', 'Restaurant', 'Chinese', 'Asian',
       'Dim Sum', 'Lounge', 'Theme', 'Indian', 'Modern European',
       'Caribbean', 'New American', 'Gastropub', 'Japanese',
       'Middle Eastern', 'Movie Theater', 'Molecular Gastronomy',
       'Spanish', 'Bar', 'Mongolian', 'Food', 'Beer Bar', 'French',
       'Szechuan', 'Nightclub', 'Hakka', 'Greek', 'Portuguese', 'Café',
       'Eastern European', 'Wine Bar', 'Deli / Bodega', 'Brewery', 'Hotel',
       'Steakhouse', 'Cuban', 'Mexican', 'Mediterranean', 'Nightlife Spot',
       'Korean', 'Music Venue', 'General Entertainment', 'Latin American',
       'Noodle House', 'Vegetarian / Vegan', 'Dumpling', 'Cantonese',
       'Furniture / Home Store', 'Tapas', 'Pizza Place'], dtype=object)

A FOR loop to group the venue categories into venues category group

In [20]:
for i in range(0,toronto_venues.shape[0]):
    if (toronto_venues.loc[i,'Venue_Category'] in ('Chinese','Korean','Thai','Asian','Japanese','Sushi', \
                                                   'Dim Sum','Hakka','Vietnamese','Mongolian','Szechuan', \
                                                  'Cantonese','Dumpling','Noodle House')):
        toronto_venues.loc[i,'Venue_Cat_Group'] = 'East Asian'
    elif (toronto_venues.loc[i,'Venue_Category'] in ('Indian')):
        toronto_venues.loc[i,'Venue_Cat_Group'] = 'South Asian'
    elif (toronto_venues.loc[i,'Venue_Category'] in ('Ethiopian','African',)):
        toronto_venues.loc[i,'Venue_Cat_Group'] = 'African'
    elif (toronto_venues.loc[i,'Venue_Category'] in ('American','New American','Steakhouse','Diner')):
        toronto_venues.loc[i,'Venue_Cat_Group'] = 'North American'
    elif (toronto_venues.loc[i,'Venue_Category'] in ('Mexican','Latin American')):
        toronto_venues.loc[i,'Venue_Cat_Group'] = 'Latin American'
    elif (toronto_venues.loc[i,'Venue_Category'] in ('Bar','Nightclub','Wine Bar','Beer Bar','Gastropub', \
                                                        'Pub','Brewery','Nightlife Spot','Lounge')):
        toronto_venues.loc[i,'Venue_Cat_Group'] = 'Beverage Bar'
    elif (toronto_venues.loc[i,'Venue_Category'] in ('Italian','Pizza Place','Spanish','Tapas', \
                                                        'French','Portuguese','Modern European','Eastern European')):
        toronto_venues.loc[i,'Venue_Cat_Group'] = 'European'
    elif (toronto_venues.loc[i,'Venue_Category'] in ('Middle Eastern','Greek','Mediterranean')):
        toronto_venues.loc[i,'Venue_Cat_Group'] = 'Mediterranean'
    elif (toronto_venues.loc[i,'Venue_Category'] in ('Caribbean','Cuban')):
        toronto_venues.loc[i,'Venue_Cat_Group'] = 'Caribbean'
    else:
        toronto_venues.loc[i,'Venue_Cat_Group'] = (toronto_venues.loc[i,'Venue_Category'])

In [21]:
toronto_venues['Venue_Cat_Group'].unique()

array(['Breakfast Spot', 'African', 'European', 'East Asian',
       'North American', 'Fast Food', 'Beverage Bar', 'Sandwich Place',
       'Restaurant', 'Theme', 'South Asian', 'Caribbean', 'Mediterranean',
       'Movie Theater', 'Molecular Gastronomy', 'Food', 'Café',
       'Deli / Bodega', 'Hotel', 'Latin American', 'Music Venue',
       'General Entertainment', 'Vegetarian / Vegan',
       'Furniture / Home Store'], dtype=object)

Deleting rows that either are not true culinary establishments e.g. Music Venue, Movie Tehater and do not have any specific categorization e.g. - Food, Restaurant

In [22]:
delet_cat_group = ('Restaurant','General Entertainment','Hotel','Food','Deli / Bodega', \
                   'Movie Theater','Music Venue','Furniture / Home Store')
toronto_venues = toronto_venues[~toronto_venues.Venue_Cat_Group.isin(delet_cat_group)]

In [23]:
toronto_venues.reset_index(drop=True,inplace=True)
toronto_venues['Venue_Cat_Group'].unique()

array(['Breakfast Spot', 'African', 'European', 'East Asian',
       'North American', 'Fast Food', 'Beverage Bar', 'Sandwich Place',
       'Theme', 'South Asian', 'Caribbean', 'Mediterranean',
       'Molecular Gastronomy', 'Café', 'Latin American',
       'Vegetarian / Vegan'], dtype=object)

toronto_venues.shape

In [25]:
toronto_venues.columns

Index(['Neighborhood', 'Neigh_Latitude', 'Neigh_Longitude', 'ID', 'Venue',
       'Venue_Latitude', 'Venue_Longitude', 'Venue_Category',
       'Venue_Cat_Group'],
      dtype='object')

Creating a new data froame with just the venue information and dropping all the duplicate records

In [26]:
toronto_venues_no_dup = pd.DataFrame
toronto_venues_no_dup = toronto_venues.copy()
toronto_venues_no_dup.drop(['Neighborhood','Neigh_Latitude','Neigh_Longitude'],axis=1,inplace=True)
toronto_venues_no_dup.drop_duplicates(inplace=True)
toronto_venues_no_dup.reset_index(drop=True,inplace=True)
print(toronto_venues_no_dup.shape)
toronto_venues_no_dup.head(2)

(262, 6)


Unnamed: 0,ID,Venue,Venue_Latitude,Venue_Longitude,Venue_Category,Venue_Cat_Group
0,4f872713e4b0abaa00979185,Site Of Great Canary Restaurant,43.6533,-79.3579,Breakfast Spot,Breakfast Spot
1,5b9e897ac9a5170039679335,Ryan Restaurant,43.6557,-79.3641,Ethiopian,African


Adding a new column to store the ratings value

In [27]:
toronto_venues_no_dup['Venue_Rating'] = np.nan
toronto_venues_no_dup.columns

Index(['ID', 'Venue', 'Venue_Latitude', 'Venue_Longitude', 'Venue_Category',
       'Venue_Cat_Group', 'Venue_Rating'],
      dtype='object')

In [28]:
toronto_venues_no_dup.shape

(262, 7)

Copying the data into a new data frame for further processing and preserving the original data frame

In [29]:
toronto_venues_1 = toronto_venues_no_dup.copy()
toronto_venues_1.head(2)

Unnamed: 0,ID,Venue,Venue_Latitude,Venue_Longitude,Venue_Category,Venue_Cat_Group,Venue_Rating
0,4f872713e4b0abaa00979185,Site Of Great Canary Restaurant,43.6533,-79.3579,Breakfast Spot,Breakfast Spot,
1,5b9e897ac9a5170039679335,Ryan Restaurant,43.6557,-79.3641,Ethiopian,African,


In [30]:
toronto_venues_1.shape

(262, 7)

Creating user defined function to get the ratings value

In [31]:
def getVenueRatings(venue_id):
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
        venue_id, CLIENT_ID, CLIENT_SECRET, VERSION)
    results = requests.get(url).json()
    try:
        venue_rating = results['response']['venue']['rating']  
    except:
        venue_rating = 0
    return(venue_rating)

Using a for loop to get hte ratings for all rows in the input data frame

In [32]:
for i in range(0,toronto_venues_1.shape[0]):
    toronto_venues_1.loc[i,'Venue_Rating'] = getVenueRatings(toronto_venues_1.loc[i,'ID'])

toronto_venues_1.head(3)

Unnamed: 0,ID,Venue,Venue_Latitude,Venue_Longitude,Venue_Category,Venue_Cat_Group,Venue_Rating
0,4f872713e4b0abaa00979185,Site Of Great Canary Restaurant,43.6533,-79.3579,Breakfast Spot,Breakfast Spot,0.0
1,5b9e897ac9a5170039679335,Ryan Restaurant,43.6557,-79.3641,Ethiopian,African,0.0
2,4ae5b91ff964a520a6a121e3,Morning Glory Cafe,43.6539,-79.3611,Breakfast Spot,Breakfast Spot,7.9


In [33]:
toronto_venues_1['Venue_Rating'].unique()

array([ 0. ,  7.9,  8.1,  5.5,  7.6,  6.1,  5.4,  5.8,  6.3,  6.6,  8.2,
        7.7,  7.2,  6.2,  8.6,  7. ,  5.1,  8.9,  6.4,  6.8,  5.6,  7.8,
        7.3,  5.7,  7.5,  6. ,  8. ,  8.4,  5.2,  7.1,  8.3,  4.7,  6.7,
        7.4,  5.9,  8.7,  6.5,  6.9,  9.1,  9.2])

Creating a data frame to store only rows with non-zero ratings and also grouping the rows based on vanue category group type. Aggregate function of count is used on the Venue column and Sum on the Ratings column

In [34]:
toront_venue_1_rate_nonzero = pd.DataFrame()
toront_venue_1_rate_nonzero = toronto_venues_1[toronto_venues_1['Venue_Rating'] != 0]
toront_venue_1_rate_mean = toront_venue_1_rate_nonzero.groupby('Venue_Cat_Group').agg({'Venue':'count','Venue_Rating':'sum'})
toront_venue_1_rate_mean.sort_values('Venue_Rating',ascending =False,inplace=True)
toront_venue_1_rate_mean.head()

Unnamed: 0_level_0,Venue,Venue_Rating
Venue_Cat_Group,Unnamed: 1_level_1,Unnamed: 2_level_1
East Asian,64,439.8
European,16,119.0
North American,15,110.8
Beverage Bar,14,102.2
Sandwich Place,14,86.7


Weighted Average of the ratings is calculated and added to the data frame

In [35]:
toront_venue_1_rate_mean['Avg_Rating'] = (toront_venue_1_rate_mean['Venue_Rating']  * \
                                          toront_venue_1_rate_mean['Venue']) / sum(toront_venue_1_rate_mean['Venue'])

The data frame is sorted in the descending order of the weightes average ratings

In [36]:
toront_venue_1_rate_mean.sort_values('Avg_Rating',ascending=False, inplace=True)
toront_venue_1_rate_mean.head()

Unnamed: 0_level_0,Venue,Venue_Rating,Avg_Rating
Venue_Cat_Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
East Asian,64,439.8,182.774026
European,16,119.0,12.363636
North American,15,110.8,10.792208
Beverage Bar,14,102.2,9.290909
Sandwich Place,14,86.7,7.881818


In [37]:
toront_venue_1_rate_mean.reset_index(inplace=True)
toront_venue_1_rate_mean.head()

Unnamed: 0,Venue_Cat_Group,Venue,Venue_Rating,Avg_Rating
0,East Asian,64,439.8,182.774026
1,European,16,119.0,12.363636
2,North American,15,110.8,10.792208
3,Beverage Bar,14,102.2,9.290909
4,Sandwich Place,14,86.7,7.881818


Creating a new data frame with just the neighborhod name and venue category group information and dropping any duplicates

In [38]:
toronto_neigh_cat = pd.DataFrame()
toronto_neigh_cat = toronto_venues.drop(['Neigh_Latitude','Neigh_Longitude','ID', \
                                           'Venue', 'Venue_Latitude', 'Venue_Longitude', \
                                         'Venue_Category'],axis=1).copy()
toronto_neigh_cat_2 = pd.DataFrame()
toronto_neigh_cat_2 = toronto_neigh_cat.drop_duplicates().copy()
toronto_neigh_cat_2.reset_index(inplace=True)
toronto_neigh_cat_2.drop('index',axis=1,inplace = True)
toronto_neigh_cat_2.head()

Unnamed: 0,Neighborhood,Venue_Cat_Group
0,"Harbourfront, Regent Park",Breakfast Spot
1,"Harbourfront, Regent Park",African
2,"Harbourfront, Regent Park",European
3,"Harbourfront, Regent Park",East Asian
4,"Lawrence Heights, Lawrence Manor",East Asian


Creating a new data frame by grouping the rows of previous data frame on the neighborhood value and concatenating the venue category type value

In [39]:
toronto_neigh_cat_3 = toronto_neigh_cat_2.groupby('Neighborhood')['Venue_Cat_Group'].apply(tuple)
toronto_neigh_cat_3 = pd.DataFrame(toronto_neigh_cat_3)
toronto_neigh_cat_3.reset_index(inplace=True)
toronto_neigh_cat_3['Venue_Cat_Group'] = toronto_neigh_cat_3['Venue_Cat_Group'].astype(str)
toronto_neigh_cat_3['Venue_Cat_Group'] = toronto_neigh_cat_3['Venue_Cat_Group'].str.slice(stop = -2)
toronto_neigh_cat_3['Venue_Cat_Group'] = toronto_neigh_cat_3['Venue_Cat_Group'].str.slice(start = 2)
toronto_neigh_cat_3['Venue_Cat_Group'] = toronto_neigh_cat_3['Venue_Cat_Group'].str.replace("'","")
toronto_neigh_cat_3.head()

Unnamed: 0,Neighborhood,Venue_Cat_Group
0,"Adelaide, King, Richmond","North American, South Asian, East Asian, Bever..."
1,Agincourt,"East Asian, Sandwich Place"
2,"Agincourt North, ""LAmoreaux East"", Milliken, S...",East Asian
3,"Alderwood, Long Branch","European, East Asian"
4,"Bathurst Manor, Downsview North, Wilson Heights","Mediterranean, Sandwich Place"


Creating a user defined function to find the next venue category group type

In [40]:
def getNextRestaurant(toronto_neigh_cat_3):
    next_rest = pd.DataFrame(columns=('Neighborhood','Next_Rest_Cat_Group'))
    i = 0
    for i in range(0,toronto_neigh_cat_3.shape[0]):
        next_rest.loc[i,'Neighborhood'] = toronto_neigh_cat_3.loc[i,'Neighborhood']
        j = 0
        for j in range(0,toront_venue_1_rate_mean.shape[0]):
            if (toront_venue_1_rate_mean.loc[j,'Venue_Cat_Group'] in toronto_neigh_cat_3.loc[i,'Venue_Cat_Group']):
                j = j + 1
            else:
                next_rest.loc[i,'Next_Rest_Cat_Group'] = toront_venue_1_rate_mean.loc[j,'Venue_Cat_Group']
                break
        i = i + 1
    
    return(next_rest)      


Calling the function by passing the data frame as an argument

In [41]:
next_rest = getNextRestaurant(toronto_neigh_cat_3)

In [42]:
next_rest.head(20)

Unnamed: 0,Neighborhood,Next_Rest_Cat_Group
0,"Adelaide, King, Richmond",Molecular Gastronomy
1,Agincourt,European
2,"Agincourt North, ""LAmoreaux East"", Milliken, S...",European
3,"Alderwood, Long Branch",North American
4,"Bathurst Manor, Downsview North, Wilson Heights",East Asian
5,"Bedford Park, Lawrence Manor East",North American
6,Berczy Park,European
7,"Brockton, Exhibition Place, Parkdale Village",European
8,"Cabbagetown, St. James Town",European
9,Caledonia-Fairbanks,East Asian


In [43]:
next_rest.shape

(56, 2)

Grouping the resultant data frame based on the next reataurant category group value to get the location suited to open a restaurant of a particular type

In [44]:
next_loc = next_rest.groupby('Next_Rest_Cat_Group')['Neighborhood'].apply(tuple)
next_loc = pd.DataFrame(next_loc)
next_loc.reset_index(inplace=True)
next_loc['Neighborhood'] = next_loc['Neighborhood'].astype(str)
next_loc['Neighborhood'] = next_loc['Neighborhood'].str.slice(stop = -2)
next_loc['Neighborhood'] = next_loc['Neighborhood'].str.slice(start = 2)
next_loc['Neighborhood'] = next_loc['Neighborhood'].str.replace("'","")
next_loc.head(25)

Unnamed: 0,Next_Rest_Cat_Group,Neighborhood
0,Beverage Bar,"Central Bay Street, Little Portugal, Trinity"
1,Breakfast Spot,Church and Wellesley
2,East Asian,"Bathurst Manor, Downsview North, Wilson Height..."
3,European,"Agincourt, Agincourt North, ""LAmoreaux East"", ..."
4,Mediterranean,"Commerce Court, Victoria Hotel"
5,Molecular Gastronomy,"Adelaide, King, Richmond, Ryerson, Garden Dist..."
6,North American,"Alderwood, Long Branch, Bedford Park, Lawrence..."
7,Sandwich Place,"St. James Town, Stn A PO Boxes 25 The Esplanade"
8,Vegetarian / Vegan,"Design Exchange, Toronto Dominion Centre, Firs..."
