# Finding Investment opportunities in Amsterdam


Import necessary libraries

In [5]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim
from urllib.request import urlopen
from pandas.io.json import json_normalize

import matplotlib.cm as cm
import matplotlib.colors as colors

from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium

Fetching package metadata .............
Solving package specifications: .

# All requested packages already installed.
# packages in environment at /opt/conda/envs/DSX-Python35:
#
folium                    0.5.0                      py_0    conda-forge


Data obtained from Amsterdam City council's map data:
https://maps.amsterdam.nl/open_geodata/
The data was ';' separated and decimals are represented by commas.
Dutch column names were replaced to English.

In [7]:

geo = pd.read_csv("https://raw.githubusercontent.com/chatterjeesd/Coursera_Capstone/master/GEBIED_BUURTEN.csv" , sep = ';', decimal=",")
df = pd.DataFrame(geo, columns=['Buurt_code','Buurt','LAT', 'LNG'])
df = df.rename(columns={'Buurt_code': 'Code', 'Buurt': 'Neighborhood', 'LAT': 'Latitude', 'LNG': 'Longitude' })
df[['Latitude', 'Longitude']] = df[['Latitude', 'Longitude']].astype(float)
df.head()

Unnamed: 0,Code,Neighborhood,Latitude,Longitude
0,F81d,Calandlaan/Lelylaan,52.355708,4.809697
1,F81e,Osdorp Zuidoost,52.353736,4.811344
2,F82a,Osdorp Midden Noord,52.362078,4.791792
3,F82b,Osdorp Midden Zuid,52.358838,4.793781
4,F82c,Zuidwestkwadrant Osdorp Noord,52.355523,4.795597


Use geopy library to get the latitude and longitude values of Amsterdam

In [6]:
address = 'Amsterdam'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Amsterdam are {}, {}.'.format(latitude, longitude))

The geograpical coordinates of Amsterdam are 52.3745403, 4.89797550561798.


Create a map of Amsterdam with neighborhoods.

In [8]:
# create map of Amsterdam using latitude and longitude values
map_ams = folium.Map(location=[latitude, longitude], zoom_start=10)
# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Code'], df['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        ).add_to(map_ams)
    
map_ams

In [9]:
CLIENT_ID = 'EWCWF0RLYWIFWUKA5ABEWJJMNYRJVZW0YYWGKYLLAFWQEKYU' # your Foursquare ID
CLIENT_SECRET = 'T3YYAKNFDU2QMNLSPUN1WFMSRUR2N3IAICXLMXSHXB2TZTT0' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: EWCWF0RLYWIFWUKA5ABEWJJMNYRJVZW0YYWGKYLLAFWQEKYU
CLIENT_SECRET:T3YYAKNFDU2QMNLSPUN1WFMSRUR2N3IAICXLMXSHXB2TZTT0


Next is to find banks in the localities

In [10]:
# create url to find banks(categoryId = '4bf58dd8d48988d10a951735')
LIMIT = 100
radius = 500
categoryId = '4bf58dd8d48988d10a951735'

url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}&categoryId={}'.format(CLIENT_ID, CLIENT_SECRET, latitude, longitude, VERSION,  radius, LIMIT,categoryId)

In [11]:


def getNearbyIRs(names, latitudes, longitudes, radius=1000):
    venues_list=[]
    
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
        
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&radius={}&limit={}&categoryId={}'.format(CLIENT_ID, CLIENT_SECRET, lat, lng, VERSION,  radius, LIMIT,categoryId)

        
        results = requests.get(url).json()["response"]['venues']
        
        venues_list.append([(
            name,
            v['id'],
            v['name'], 
            v['location']['lat'], 
            v['location']['lng'],
            v['categories'][0]['name'])for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Id',          
                  'Name', 
                  'Latitude', 
                  'Longitude',
                  'Category']
    
    return(nearby_venues)



In [12]:
ams_banks = getNearbyIRs(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

Calandlaan/Lelylaan
Osdorp Zuidoost
Osdorp Midden Noord
Osdorp Midden Zuid
Zuidwestkwadrant Osdorp Noord
Zuidwestkwadrant Osdorp Zuid
De Punt
Bedrijvencentrum Osdorp
Overtoomse Veld Noord
Rembrandtpark Noord
Rembrandtpark Zuid
Johan Jongkindbuurt
Lucas/Andreasziekenhuis e.o.
Koningin Wilhelminaplein
Andreasterrein
Delflandpleinbuurt Oost
Delflandpleinbuurt West
Riekerhaven
Schipluidenbuurt
Riekerpolder
Park Haagseweg
Nieuw Sloten Noordwest
Nieuw Sloten Noordoost
Belgiëplein e.o.
Nieuw Sloten Zuidwest
Nieuw Sloten Zuidoost
Hemonybuurt
Gerard Doubuurt
Frans Halsbuurt
Hercules Seghersbuurt
Sarphatiparkbuurt
Willibrordusbuurt
Van der Helstpleinbuurt
Lizzy Ansinghbuurt
Cornelis Troostbuurt
Surinamepleinbuurt
Kop Zeedijk
BG-terrein e.o.
Stationsplein e.o.
Hemelrijk
Spuistraat Noord
Nieuwe Kerk e.o.
Spuistraat Zuid
Kalverdriehoek
Langestraat e.o.
Leliegracht e.o.
Felix Meritisbuurt
Leidsegracht Noord
Spiegelbuurt
Gouden Bocht
Van Loonbuurt
Amstelveldbuurt
Rembrandtpleinbuurt
Scheepvaarthuisbu

In [13]:
ams_banks.head(10)

Unnamed: 0,Neighborhood,Id,Name,Latitude,Longitude,Category
0,Calandlaan/Lelylaan,4d0cb102f393224bbfbc17ee,ING Bank,52.35959,4.807111,Bank
1,Calandlaan/Lelylaan,52c17d21498e5c33d0175bff,Rabobank,52.359367,4.803372,Bank
2,Calandlaan/Lelylaan,4de385662271bfb844b46331,Abn Amro,52.35892,4.803708,Bank
3,Calandlaan/Lelylaan,4d0b52de109d6dcb255d8e8d,Rabobank Osdorp,52.359167,4.80337,Bank
4,Osdorp Zuidoost,4d0cb102f393224bbfbc17ee,ING Bank,52.35959,4.807111,Bank
5,Osdorp Zuidoost,4de385662271bfb844b46331,Abn Amro,52.35892,4.803708,Bank
6,Osdorp Zuidoost,52c17d21498e5c33d0175bff,Rabobank,52.359367,4.803372,Bank
7,Osdorp Zuidoost,4d0b52de109d6dcb255d8e8d,Rabobank Osdorp,52.359167,4.80337,Bank
8,Osdorp Zuidoost,4cffc881ffcea143a7822991,Rabobank Sloten,52.345316,4.811221,Bank
9,Osdorp Zuidoost,4daeb4221e7207bbeb15329f,Abn Amro Belgieplein,52.345644,4.811153,Bank


Find top 10 Neighborhoods have most banks

In [14]:

df1=ams_banks[['Id','Neighborhood']].groupby('Neighborhood').count()
df1 = df1.sort_values(by=['Id'], ascending=False).head(10)
df1.reset_index(inplace=True)
df1



Unnamed: 0,Neighborhood,Id
0,BG-terrein e.o.,38
1,Zuiderkerkbuurt,37
2,Nes e.o.,36
3,Reguliersbuurt,34
4,Rembrandtpleinbuurt,34
5,Spiegelbuurt,34
6,Amstelveldbuurt,33
7,Begijnhofbuurt,33
8,Weteringbuurt,33
9,Spuistraat Zuid,32


Find characteristics of Neighborhood using K means clustering



In [15]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    try:
        for name, lat, lng in zip(names, latitudes, longitudes):
            print(name)

            # create the API request URL
            url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
                CLIENT_ID, 
                CLIENT_SECRET, 
                VERSION, 
                lat, 
                lng, 
                radius, 
                LIMIT)

            # make the GET request
            results = requests.get(url).json()["response"]['groups'][0]['items']

            # return only relevant information for each nearby venue
            venues_list.append([(
                name, 
                lat, 
                lng, 
                v['venue']['name'], 
                v['venue']['location']['lat'], 
                v['venue']['location']['lng'],  
                v['venue']['categories'][0]['name']) for v in results])

        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighborhood', 
                      'Neighborhood Latitude', 
                      'Neighborhood Longitude', 
                      'Venue', 
                      'Venue Latitude', 
                      'Venue Longitude', 
                      'Venue Category']

        return(nearby_venues)
    except:
        print("It stopped midway")

In [18]:
ams_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )



Calandlaan/Lelylaan
Osdorp Zuidoost
Osdorp Midden Noord
Osdorp Midden Zuid
Zuidwestkwadrant Osdorp Noord
Zuidwestkwadrant Osdorp Zuid
De Punt
Bedrijvencentrum Osdorp
Overtoomse Veld Noord
Rembrandtpark Noord
Rembrandtpark Zuid
Johan Jongkindbuurt
Lucas/Andreasziekenhuis e.o.
Koningin Wilhelminaplein
Andreasterrein
Delflandpleinbuurt Oost
Delflandpleinbuurt West
Riekerhaven
Schipluidenbuurt
Riekerpolder
Park Haagseweg
Nieuw Sloten Noordwest
Nieuw Sloten Noordoost
Belgiëplein e.o.
Nieuw Sloten Zuidwest
Nieuw Sloten Zuidoost
Hemonybuurt
Gerard Doubuurt
Frans Halsbuurt
Hercules Seghersbuurt
Sarphatiparkbuurt
Willibrordusbuurt
Van der Helstpleinbuurt
Lizzy Ansinghbuurt
Cornelis Troostbuurt
Surinamepleinbuurt
Kop Zeedijk
BG-terrein e.o.
Stationsplein e.o.
Hemelrijk
Spuistraat Noord
Nieuwe Kerk e.o.
Spuistraat Zuid
Kalverdriehoek
Langestraat e.o.
Leliegracht e.o.
Felix Meritisbuurt
Leidsegracht Noord
Spiegelbuurt
Gouden Bocht
Van Loonbuurt
Amstelveldbuurt
Rembrandtpleinbuurt
Scheepvaarthuisbu

In [19]:
print(ams_venues.shape)
ams_venues.head()

(15892, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Calandlaan/Lelylaan,52.355708,4.809697,Toko Bandung,52.354358,4.810843,Indonesian Restaurant
1,Calandlaan/Lelylaan,52.355708,4.809697,Enfes,52.354057,4.810545,Turkish Restaurant
2,Calandlaan/Lelylaan,52.355708,4.809697,Sportcentrum Caland,52.354371,4.807132,Gym / Fitness Center
3,Calandlaan/Lelylaan,52.355708,4.809697,De Meervaart,52.35897,4.807311,Theater
4,Calandlaan/Lelylaan,52.355708,4.809697,TK Maxx,52.359155,4.805335,Clothing Store


In [20]:
# OHE (One Hot encoding)
ams_onehot = pd.get_dummies(ams_venues[['Venue Category']], prefix="", prefix_sep="")

# adding neighborhood back to dataframe
ams_onehot['Neighborhood'] = ams_venues['Neighborhood'] 

# moving neighborhood to the first column
fixed_columns = [ams_onehot.columns[-1]] + list(ams_onehot.columns[:-1])
ams_onehot = ams_onehot[fixed_columns]

ams_onehot.head()



Unnamed: 0,Zoo Exhibit,Accessories Store,Adult Boutique,Advertising Agency,Afghan Restaurant,African Restaurant,American Restaurant,Antique Shop,Aquarium,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Arts & Entertainment,Asian Restaurant,Athletics & Sports,Australian Restaurant,Austrian Restaurant,Auto Dealership,Auto Garage,Auto Workshop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bar,Baseball Field,Basketball Court,Beach,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Belgian Restaurant,Bike Rental / Bike Share,Bike Shop,Bistro,Board Shop,Boarding House,Boat or Ferry,Bookstore,Botanical Garden,Boutique,Bowling Alley,Boxing Gym,Brasserie,Brazilian Restaurant,Breakfast Spot,Brewery,Bridal Shop,Bridge,Buffet,Building,Burger Joint,Burrito Place,Bus Line,Bus Station,Bus Stop,Business Service,Butcher,Cafeteria,Café,Camera Store,Campground,Canal,Canal Lock,Candy Store,Caribbean Restaurant,Casino,Cheese Shop,Child Care Service,Chinese Restaurant,Chocolate Shop,Church,Circus,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Arts Building,College Gym,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Construction & Landscaping,Convenience Store,Convention Center,Cosmetics Shop,Coworking Space,Creperie,Cruise,Cupcake Shop,Currency Exchange,Cycle Studio,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Discount Store,Distillery,Dive Bar,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Dutch Restaurant,Eastern European Restaurant,Electronics Store,Empanada Restaurant,Ethiopian Restaurant,Event Space,Exhibit,Fabric Shop,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Fish & Chips Shop,Fish Market,Fishing Spot,Flea Market,Flower Shop,Fondue Restaurant,Food,Food & Drink Shop,Food Court,Food Service,Food Stand,Food Truck,Football Stadium,Forest,Fraternity House,French Restaurant,Fried Chicken Joint,Friterie,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Gaming Cafe,Garden,Garden Center,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Government Building,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Hawaiian Restaurant,Health & Beauty Service,Health Food Store,Herbs & Spices Store,Historic Site,History Museum,Hobby Shop,Hockey Field,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,IT Services,Ice Cream Shop,Indian Chinese Restaurant,Indian Restaurant,Indie Movie Theater,Indie Theater,Indonesian Restaurant,Indoor Play Area,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewelry Store,Juice Bar,Karaoke Bar,Kebab Restaurant,Kids Store,Korean Restaurant,Lake,Latin American Restaurant,Laundry Service,Lawyer,Lebanese Restaurant,Library,Lingerie Store,Liquor Store,Lounge,Luggage Store,Malay Restaurant,Marijuana Dispensary,Market,Martial Arts Dojo,Massage Studio,Maternity Clinic,Medical Center,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mini Golf,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Molecular Gastronomy Restaurant,Monument / Landmark,Moroccan Restaurant,Motorcycle Shop,Movie Theater,Moving Target,Multiplex,Museum,Music School,Music Store,Music Venue,Nail Salon,Nature Preserve,Neighborhood,Nightclub,Noodle House,North Indian Restaurant,Notary,Office,Opera House,Optical Shop,Organic Grocery,Other Great Outdoors,Outdoor Sculpture,Outdoor Supply Store,Outdoors & Recreation,Palace,Paper / Office Supplies Store,Park,Parking,Pastry Shop,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pet Café,Pet Store,Pharmacy,Piano Bar,Pie Shop,Pier,Pilates Studio,Pizza Place,Planetarium,Platform,Playground,Plaza,Pool,Pool Hall,Pop-Up Shop,Portuguese Restaurant,Print Shop,Pub,Public Art,Racetrack,Ramen Restaurant,Record Shop,Recording Studio,Recreation Center,Rental Car Location,Rental Service,Resort,Rest Area,Restaurant,River,Road,Rock Climbing Spot,Rock Club,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Satay Restaurant,Scandinavian Restaurant,Scenic Lookout,Science Museum,Seafood Restaurant,Shoe Store,Shop & Service,Shopping Mall,Shopping Plaza,Skate Park,Skating Rink,Ski Area,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,South American Restaurant,South Indian Restaurant,Southern / Soul Food Restaurant,Souvenir Shop,Spa,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Sports Club,Stables,Stadium,Steakhouse,Storage Facility,Supermarket,Sushi Restaurant,Swiss Restaurant,Szechuan Restaurant,Taco Place,Tanning Salon,Tapas Restaurant,Tattoo Parlor,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Park Ride / Attraction,Theme Restaurant,Thrift / Vintage Store,Tibetan Restaurant,Tour Provider,Tourist Information Center,Toy / Game Store,Track,Trail,Train,Train Station,Tram Station,Travel & Transport,Tunnel,Turkish Restaurant,Vacation Rental,Vegetarian / Vegan Restaurant,Video Game Store,Video Store,Vietnamese Restaurant,Whisky Bar,Wine Bar,Wine Shop,Women's Store,Yoga Studio,Zoo
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Calandlaan/Lelylaan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Calandlaan/Lelylaan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Calandlaan/Lelylaan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Calandlaan/Lelylaan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,Calandlaan/Lelylaan,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [21]:
ams_grouped = ams_onehot.groupby('Neighborhood').mean().reset_index()


Now get the top 10 most common venues:

In [22]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [23]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ams_grouped['Neighborhood']

for ind in np.arange(ams_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ams_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(5)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,AMC,Coffee Shop,Convenience Store,Science Museum,Restaurant,Farmers Market,Empanada Restaurant,Ethiopian Restaurant,Event Space,Exhibit,Fabric Shop
1,Aalsmeerwegbuurt Oost,Supermarket,Bar,Restaurant,Breakfast Spot,Café,Plaza,Theater,Bagel Shop,Bakery,Burger Joint
2,Aalsmeerwegbuurt West,Supermarket,Plaza,Bar,Café,Bagel Shop,Bakery,Burger Joint,Restaurant,Falafel Restaurant,Fast Food Restaurant
3,Alexanderplein e.o.,Zoo Exhibit,Café,Supermarket,Restaurant,Theater,Hotel,Bar,Beer Garden,Monument / Landmark,African Restaurant
4,Alfa-driehoek,Boat or Ferry,Music Venue,Metro Station,Nightclub,Zoo,Fish & Chips Shop,Event Space,Exhibit,Fabric Shop,Falafel Restaurant


Now run k-means clustering. 
Note: due to API problems, 6 less localities could be stored in np array kmeans.labels_.
Since the original data frame had 481 localities and kmeans.labels_ had 475, I added zeroes to tail to avoid Type Error.
This needs to be fixed in future. 

In [24]:


kclusters = 5

ams_grouped_clustering = ams_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(ams_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]
A = np.append(kmeans.labels_, [0,0,0,0,0,0])
A.shape


(481,)

In [25]:


# add clustering labels
df['Cluster Labels'] = A

# merge manhattan_merged with neighborhoods_venues_sorted to add latitude/longitude for each neighborhood
df = df.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')


In [26]:
df2=df[['Neighborhood','Latitude', 'Longitude','Cluster Labels']]
df2.sort_values(['Cluster Labels'], ascending = False)

Unnamed: 0,Neighborhood,Latitude,Longitude,Cluster Labels
293,Middenmeer Zuid,52.35175,4.941994,4
437,Betondorp,52.340548,4.945275,4
26,Hemonybuurt,52.357005,4.902656,4
113,Landlust Zuid,52.377509,4.860611,4
381,Schinkelbuurt Noord,52.353648,4.854494,4
133,Sloterpark,52.366183,4.815385,4
383,Willemsparkbuurt Noord,52.35376,4.861742,4
275,Sporenburg,52.374152,4.947102,4
286,De Eenhoorn,52.350093,4.920238,4
63,Bloemgrachtbuurt,52.374446,4.880552,4


In [27]:
# create map of Amsterdam using latitude and longitude values
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=10)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to map
for lat, lng, neighborhood, cluster in zip(df2['Latitude'], df2['Longitude'], df2['Neighborhood'], df2['Cluster Labels']):
    label = folium.Popup(str(neighborhood) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters