# The Battle of Neighborhoods: Where to open a new Italian Restaurant in Amsterdam?
Author: Francesco Chiossi

Import the needed libraries

In [1]:
import requests
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library
import matplotlib.cm as cm
import matplotlib.colors as colors

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge


# Getting Quarters data
The city of Amsterdam provides a list of Quarters with geospatial coordinates.
https://maps.amsterdam.nl/open_geodata/

Let's load it into a pandas data frame

In [2]:
ams_quarters = pd.read_csv("https://maps.amsterdam.nl/open_geodata/excel.php?KAARTLAAG=GEBIED_BUURTCOMBINATIES_EXWATER&THEMA=gebiedsindeling", sep=';')

In [3]:
ams_quarters.head()

Unnamed: 0,OBJECTNUMMER,Buurtcombinatie_code,Buurtcombinatie,Stadsdeel_code,Opp_m2,COORDS,LNG,LAT,Unnamed: 8
0,1,T92,Amstel III/Bullewijk,T,5273610,"POLYGON((4.971842 52.284355,4.970005 52.286362...",49505918,52298741,
1,2,T93,"Bijlmer Centrum (D,F,H)",T,3158100,"POLYGON((4.971421 52.307989,4.965179 52.314811...",49545269,523153044,
2,3,T97,Gein,T,1921430,"POLYGON((5.007954 52.301543,5.005928 52.301548...",499393105,5229550995,
3,4,E18,Kinkerbuurt,E,273871,"POLYGON((4.870809 52.366843,4.868665 52.366263...",486544515,5236808265,
4,5,E21,Overtoomse Sluis,E,308628,"POLYGON((4.865539 52.360287,4.864693 52.360093...",48598361,523597247,


In [4]:
ams_quarters.shape

(99, 9)

In [5]:
ams_quarters.columns

Index(['OBJECTNUMMER', 'Buurtcombinatie_code', 'Buurtcombinatie',
       'Stadsdeel_code', 'Opp_m2', 'COORDS', 'LNG', 'LAT', 'Unnamed: 8'],
      dtype='object')

In [6]:
ams_quarters.dtypes

OBJECTNUMMER              int64
Buurtcombinatie_code     object
Buurtcombinatie          object
Stadsdeel_code           object
Opp_m2                    int64
COORDS                   object
LNG                      object
LAT                      object
Unnamed: 8              float64
dtype: object

Drop the columns we don't need from the data frame

In [7]:
ams_quarters = ams_quarters.drop(['OBJECTNUMMER', 'Buurtcombinatie_code', 'Stadsdeel_code', 'Opp_m2', 'COORDS', 'Unnamed: 8'], axis=1)

Replace comma with dots in the coordinates

In [8]:
ams_quarters = ams_quarters.stack().str.replace(',','.').unstack()

We now have the list of quarters of Amsterdam and their coordinates.

In [9]:
ams_quarters.head()

Unnamed: 0,Buurtcombinatie,LNG,LAT
0,Amstel III/Bullewijk,4.9505918,52.298741
1,Bijlmer Centrum (D.F.H),4.9545269,52.3153044
2,Gein,4.99393105,52.29550995
3,Kinkerbuurt,4.86544515,52.36808265
4,Overtoomse Sluis,4.8598361,52.3597247


In [10]:
ams_quarters.columns

Index(['Buurtcombinatie', 'LNG', 'LAT'], dtype='object')

Let's rename the columns

In [11]:
ams_quarters.columns = ['Neighborhood', 'Longitude', 'Latitude'] #Fix columns names to match

# Loading venues data from Foursquare

Variables for the function

In [12]:
CLIENT_ID = 'XXX' # your Foursquare ID
CLIENT_SECRET = 'XXX' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
radius = 500
LIMIT = 100

Function to get the top venues in the Neighbourhood that are in the  within a radius for a given section

In [13]:
def getNearbyVenues(names, latitudes, longitudes, section, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&section={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,
            section)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Let's call the function for the Quarters of Amsterdam and store the results in a data frame.
We are limiting the search to only food venues.

In [14]:
amsterdam_venues = getNearbyVenues(names=ams_quarters['Neighborhood'],
                                   latitudes=ams_quarters['Latitude'],
                                   longitudes=ams_quarters['Longitude'],
                                   section='food'
                                  )

Amstel III/Bullewijk
Bijlmer Centrum (D.F.H)
Gein
Kinkerbuurt
Overtoomse Sluis
Erasmuspark
Hoofdweg e.o.
De Weteringschans
Oostelijke Eilanden/Kadijken
Volewijck
Tuindorp Nieuwendam
Tuindorp Buiksloot
Kadoelen
Elzenhagen
Burgwallen-Oude Zijde
Bijlmer Oost (E.G.K)
Driemond
Haarlemmerbuurt
Osdorp-Oost
De Punt
Westelijk Havengebied
Centrale Markt
Westlandgracht
Zuidas
Slotermeer-Noordoost
Geuzenveld
IJburg West
Betondorp
Transvaalbuurt
Indische Buurt Oost
Nieuwe Pijp
Rijnbuurt
Da Costabuurt
Helmersbuurt
Staatsliedenbuurt
Geuzenbuurt
Sloterdijk
Burgwallen-Nieuwe Zijde
Westindische Buurt
Houthavens
Eendracht
Slotermeer-Zuidwest
Osdorp-Midden
Slotervaart Zuid
Oude Pijp
Schinkelbuurt
Apollobuurt
IJselbuurt
Buitenveldert-West
Oosterparkbuurt
Indische Buurt West
Zeeburgereiland/Nieuwe Diep
Nieuwendammerdijk/Buiksloterdijk
Tuindorp Oostzaan
Nellestein
Frederik Hendrikbuurt
Grachtengordel-West
Spaarndammer- en Zeeheldenbuurt
Van Lennepbuurt
Nieuwmarkt/Lastage
De Kolenkit
Slotervaart Noord
Lutkeme

Let's check the size of the resulting dataframe

In [15]:
print(amsterdam_venues.shape)
amsterdam_venues.head()

(2401, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Amstel III/Bullewijk,52.298741,4.9505918,La Place,52.297268,4.947243,Restaurant
1,Amstel III/Bullewijk,52.298741,4.9505918,McDonald's Nederland HQ,52.297649,4.951005,Fast Food Restaurant
2,Amstel III/Bullewijk,52.298741,4.9505918,IKEA Restaurant,52.302151,4.949925,Scandinavian Restaurant
3,Amstel III/Bullewijk,52.298741,4.9505918,Grand-Cafe ZO,52.295557,4.951118,Restaurant
4,Amstel III/Bullewijk,52.298741,4.9505918,Cafeteria Bugaboo,52.299327,4.952841,Sandwich Place


Let's check how many venues were returned for each neighborhood

In [16]:
amsterdam_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Amstel III/Bullewijk,8,8,8,8,8,8
Apollobuurt,12,12,12,12,12,12
Banne Buiksloot,5,5,5,5,5,5
Bedrijventerrein Sloterdijk,4,4,4,4,4,4
Betondorp,3,3,3,3,3,3
Bijlmer Centrum (D.F.H),43,43,43,43,43,43
Bijlmer Oost (E.G.K),9,9,9,9,9,9
Buikslotermeer,18,18,18,18,18,18
Buitenveldert-Oost,6,6,6,6,6,6
Buitenveldert-West,1,1,1,1,1,1


List Venue Categories sorted by number of occurrences

In [17]:
amsterdam_venues.groupby('Venue Category').count().sort_values('Neighborhood', ascending=False)

Unnamed: 0_level_0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude
Venue Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Café,231,231,231,231,231,231
Restaurant,218,218,218,218,218,218
Italian Restaurant,162,162,162,162,162,162
Bakery,154,154,154,154,154,154
Pizza Place,100,100,100,100,100,100
Snack Place,82,82,82,82,82,82
Sandwich Place,73,73,73,73,73,73
French Restaurant,73,73,73,73,73,73
Chinese Restaurant,67,67,67,67,67,67
Breakfast Spot,62,62,62,62,62,62


# Analyze Each Quarter

In [18]:
# one hot encoding
amsterdam_onehot = pd.get_dummies(amsterdam_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
amsterdam_onehot['Neighborhood'] = amsterdam_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [amsterdam_onehot.columns[-1]] + list(amsterdam_onehot.columns[:-1])
amsterdam_onehot = amsterdam_onehot[fixed_columns]

amsterdam_onehot.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,...,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Amstel III/Bullewijk,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Amstel III/Bullewijk,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Amstel III/Bullewijk,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Amstel III/Bullewijk,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Amstel III/Bullewijk,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [19]:
amsterdam_grouped = amsterdam_onehot.groupby('Neighborhood').mean().reset_index()
amsterdam_grouped

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,...,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Amstel III/Bullewijk,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,Apollobuurt,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.083333,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,Banne Buiksloot,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.200000,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,0.000000
3,Bedrijventerrein Sloterdijk,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,Betondorp,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.333333,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
5,Bijlmer Centrum (D.F.H),0.0,0.000000,0.000000,0.000000,0.046512,0.000000,0.000000,0.000000,0.139535,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.023256,0.023256,0.000000
6,Bijlmer Oost (E.G.K),0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.222222,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.111111,0.000000,0.000000
7,Buikslotermeer,0.0,0.000000,0.000000,0.000000,0.055556,0.000000,0.000000,0.000000,0.166667,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,Buitenveldert-Oost,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,Buitenveldert-West,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.00,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


Let's print each neighborhood along with the top 5 most common venues

In [20]:
num_top_venues = 5

for hood in amsterdam_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = amsterdam_grouped[amsterdam_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Amstel III/Bullewijk----
                     venue  freq
0     Fast Food Restaurant  0.38
1               Restaurant  0.25
2           Sandwich Place  0.12
3  Scandinavian Restaurant  0.12
4           Breakfast Spot  0.12


----Apollobuurt----
                venue  freq
0          Restaurant  0.17
1          Donut Shop  0.08
2              Bakery  0.08
3                Café  0.08
4  Chinese Restaurant  0.08


----Banne Buiksloot----
                venue  freq
0         Pizza Place   0.2
1  Turkish Restaurant   0.2
2          Restaurant   0.2
3              Bakery   0.2
4                Café   0.2


----Bedrijventerrein Sloterdijk----
                 venue  freq
0           Restaurant  0.25
1  Fried Chicken Joint  0.25
2       Sandwich Place  0.25
3          Snack Place  0.25
4    Afghan Restaurant  0.00


----Betondorp----
                      venue  freq
0                Restaurant  0.33
1                    Bakery  0.33
2                      Café  0.33
3         Afghan Rest

Let's put that into a pandas dataframe
First, let's write a function to sort the venues in descending order.

In [21]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Now let's create the new dataframe and display the top 10 venues for each neighborhood.

In [22]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = amsterdam_grouped['Neighborhood']

for ind in np.arange(amsterdam_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(amsterdam_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Amstel III/Bullewijk,Fast Food Restaurant,Restaurant,Breakfast Spot,Sandwich Place,Scandinavian Restaurant,Ethiopian Restaurant,Dim Sum Restaurant,Diner,Doner Restaurant,Donut Shop
1,Apollobuurt,Restaurant,Gastropub,Bakery,Donut Shop,Chinese Restaurant,Café,Italian Restaurant,Bistro,Steakhouse,Breakfast Spot
2,Banne Buiksloot,Turkish Restaurant,Pizza Place,Restaurant,Bakery,Café,Vietnamese Restaurant,Ethiopian Restaurant,Dim Sum Restaurant,Diner,Doner Restaurant
3,Bedrijventerrein Sloterdijk,Fried Chicken Joint,Restaurant,Sandwich Place,Snack Place,Vietnamese Restaurant,Empanada Restaurant,Deli / Bodega,Dim Sum Restaurant,Diner,Doner Restaurant
4,Betondorp,Restaurant,Café,Bakery,Vietnamese Restaurant,Falafel Restaurant,Diner,Doner Restaurant,Donut Shop,Dumpling Restaurant,Dutch Restaurant
5,Bijlmer Centrum (D.F.H),Bakery,Chinese Restaurant,Sandwich Place,Fast Food Restaurant,Restaurant,Cajun / Creole Restaurant,Deli / Bodega,Café,Asian Restaurant,Italian Restaurant
6,Bijlmer Oost (E.G.K),Bakery,Chinese Restaurant,Middle Eastern Restaurant,Burger Joint,Snack Place,Fast Food Restaurant,South American Restaurant,Turkish Restaurant,Food Court,Fried Chicken Joint
7,Buikslotermeer,Snack Place,Bakery,Chinese Restaurant,Fast Food Restaurant,Diner,Café,Restaurant,Sandwich Place,Seafood Restaurant,Breakfast Spot
8,Buitenveldert-Oost,Pizza Place,French Restaurant,Sandwich Place,Fish & Chips Shop,Cafeteria,Creperie,Vietnamese Restaurant,Eastern European Restaurant,Dim Sum Restaurant,Diner
9,Buitenveldert-West,Gastropub,Vietnamese Restaurant,Falafel Restaurant,Dim Sum Restaurant,Diner,Doner Restaurant,Donut Shop,Dumpling Restaurant,Dutch Restaurant,Eastern European Restaurant


# Analyze the data for Italian Restaurants

Let's count how many food venues for each category are in each amsterdam quarter

In [23]:
amsterdam_restaurants_count=amsterdam_onehot.groupby('Neighborhood').sum().reset_index()
amsterdam_restaurants_count.head()

Unnamed: 0,Neighborhood,Afghan Restaurant,African Restaurant,American Restaurant,Argentinian Restaurant,Asian Restaurant,Australian Restaurant,BBQ Joint,Bagel Shop,Bakery,...,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Thai Restaurant,Tibetan Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Amstel III/Bullewijk,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Apollobuurt,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,Banne Buiksloot,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
3,Bedrijventerrein Sloterdijk,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Betondorp,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


Let's now focus only on Italian Restaurants and sort the results in descending order so we can see which Quarter has the most

In [24]:
amsterdam_italian_restaurant_count = amsterdam_restaurants_count[['Neighborhood','Italian Restaurant']].sort_values('Italian Restaurant', ascending=False)
amsterdam_italian_restaurant_count.head()

Unnamed: 0,Neighborhood,Italian Restaurant
27,Grachtengordel-Zuid,14
43,Kinkerbuurt,9
11,Burgwallen-Oude Zijde,9
26,Grachtengordel-West,8
41,Jordaan,8


Let's count the total number of food venues for each Quarter

In [25]:
amsterdam_food_venues_count = amsterdam_venues[['Neighborhood', 'Venue']].groupby('Neighborhood').count().reset_index()
amsterdam_food_venues_count.columns=['Neighborhood', 'Total Food Venues']
amsterdam_food_venues_count.head()

Unnamed: 0,Neighborhood,Total Food Venues
0,Amstel III/Bullewijk,8
1,Apollobuurt,12
2,Banne Buiksloot,5
3,Bedrijventerrein Sloterdijk,4
4,Betondorp,3


Let's merge the 2 data frames so that we have for each Quarter the number of italian restaurants and the total number of food venues

In [26]:
amsterdam_merged = amsterdam_italian_restaurant_count.join(amsterdam_food_venues_count.set_index('Neighborhood'), on='Neighborhood', how='inner')
amsterdam_merged.head()

Unnamed: 0,Neighborhood,Italian Restaurant,Total Food Venues
27,Grachtengordel-Zuid,14,100
43,Kinkerbuurt,9,89
11,Burgwallen-Oude Zijde,9,100
26,Grachtengordel-West,8,88
41,Jordaan,8,86


Let's calculate what's the percentage of Italian Restaurants over the total for each quarter and add it as a new cloumn in the data frame

In [27]:
amsterdam_merged['Italian Restaurant Ratio'] = amsterdam_merged['Italian Restaurant']/amsterdam_merged['Total Food Venues']
amsterdam_merged.head()

Unnamed: 0,Neighborhood,Italian Restaurant,Total Food Venues,Italian Restaurant Ratio
27,Grachtengordel-Zuid,14,100,0.14
43,Kinkerbuurt,9,89,0.101124
11,Burgwallen-Oude Zijde,9,100,0.09
26,Grachtengordel-West,8,88,0.090909
41,Jordaan,8,86,0.093023


Let's merge the data frame with the one having the coordinates of the quarters

In [28]:
amsterdam_merged = amsterdam_merged.join(ams_quarters.set_index('Neighborhood'), on='Neighborhood', how='inner')
amsterdam_merged.head()

Unnamed: 0,Neighborhood,Italian Restaurant,Total Food Venues,Italian Restaurant Ratio,Longitude,Latitude
27,Grachtengordel-Zuid,14,100,0.14,4.8924792,52.36503665
43,Kinkerbuurt,9,89,0.101124,4.86544515,52.36808265
11,Burgwallen-Oude Zijde,9,100,0.09,4.89746545,52.37228075
26,Grachtengordel-West,8,88,0.090909,4.8882184,52.3733336
41,Jordaan,8,86,0.093023,4.881443,52.37453715


## Data Visualization

Prepare a map of Amsterdam to visulalize the data

In [29]:
#Amsterdam coordinates
latitude = 52.3680
longitude = 4.9036

# create map
map_ams_italian = folium.Map(location=[latitude, longitude], zoom_start=13)


Add blue round markers to the map for each quarter, the size of the circle is proportional to the number of Italian restaurants in the quarter

In [30]:
# add markers to the map with cirles having size proportional to the number of italian restaurants in the quarter

for lat, lon, poi, res_count in zip(amsterdam_merged['Latitude'], amsterdam_merged['Longitude'], amsterdam_merged['Neighborhood'], amsterdam_merged['Italian Restaurant']):
    label = folium.Popup(str(poi) +"- Italian Restaurants: "+str(res_count), parse_html=True)
    folium.CircleMarker(
        [float(lat), float(lon)],
        radius=res_count,
        popup=label,
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.7).add_to(map_ams_italian)
    
map_ams_italian

From the map we can see that the Italian Restaurants are concentrated around the city center, a to a lwer extend on the west side.

Let's visualize also the total number of restaurants.
Add red circle markers to the map to show the total number of food venues in the quarter, the size of the circle is proportional to the total number of food venues in the quarter

In [31]:
for lat, lon, poi, res_count in zip(amsterdam_merged['Latitude'], amsterdam_merged['Longitude'], amsterdam_merged['Neighborhood'], amsterdam_merged['Total Food Venues']):
    label = folium.Popup(str(poi) +"- Total Food Venues: "+str(res_count), parse_html=True)
    folium.CircleMarker(
        [float(lat), float(lon)],
        radius=res_count/2,
        popup=label,
        color='red',
        fill=False).add_to(map_ams_italian)
    
map_ams_italian

We see a similar distribution for  the total number of restaurants.


## Finding the Best Location

An interesting place to open a new Italian Restaurant would be a place with many food venues, since it indicates there's a lot of demand, but few other Italian Restaurants, so that a new one won't face to heavy competion and there's market for it.

From the map there are 2 locations that jums to the eye:
- Oude Pijp with 5 Italian Restaurants and more than 100 food venues
- Van Lennepbuurt with 4 Italian Restaurants and more than 100 food venues

Let's check the data by getting the top 10 places with the most food venues:

In [32]:
amsterdam_top10_n_venues = amsterdam_merged.sort_values('Total Food Venues', ascending=False).head(10)
amsterdam_italian_restaurant_ratio = amsterdam_top10_n_venues.sort_values('Italian Restaurant Ratio', ascending=True)
amsterdam_italian_restaurant_ratio

Unnamed: 0,Neighborhood,Italian Restaurant,Total Food Venues,Italian Restaurant Ratio,Longitude,Latitude
82,Van Lennepbuurt,4,100,0.04,4.8674922,52.3650118
61,Oude Pijp,5,100,0.05,4.89589905,52.35621705
10,Burgwallen-Nieuwe Zijde,7,100,0.07,4.8961625,52.3740584
14,Da Costabuurt,5,71,0.070423,4.87211155,52.3701159
49,Nieuwe Pijp,6,72,0.083333,4.89706495,52.35201325
11,Burgwallen-Oude Zijde,9,100,0.09,4.89746545,52.37228075
26,Grachtengordel-West,8,88,0.090909,4.8882184,52.3733336
41,Jordaan,8,86,0.093023,4.881443,52.37453715
43,Kinkerbuurt,9,89,0.101124,4.86544515,52.36808265
27,Grachtengordel-Zuid,14,100,0.14,4.8924792,52.36503665


This confirms that Van Lennepbuurt and Oude Pijp are the ones with the lowest Italian Restaurant Ratio amont the top 10 quartes with the most food venues.

To get an idea of the type of competitors let's see what are the Most Common Venues in these 2 quarters 

In [33]:
neighborhoods_venues_sorted.loc[neighborhoods_venues_sorted['Neighborhood'] == 'Van Lennepbuurt']

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
82,Van Lennepbuurt,Café,Restaurant,Italian Restaurant,Breakfast Spot,Bakery,Japanese Restaurant,Burger Joint,Mexican Restaurant,Moroccan Restaurant,Asian Restaurant


In [34]:
neighborhoods_venues_sorted.loc[neighborhoods_venues_sorted['Neighborhood'] == 'Oude Pijp']

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
61,Oude Pijp,Café,Italian Restaurant,French Restaurant,Pizza Place,Bakery,South American Restaurant,Deli / Bodega,Vegetarian / Vegan Restaurant,Seafood Restaurant,Japanese Restaurant


We see that in both places Italian Restaurants are among the most common food venues, indicating that the type of venues in these quarters are very varied.

To further improve this analysis we would need data from additional data sources, for example on the population or the real estate value of the buildings