# Neighborhood suggestion for vegetarian/vegan restaurant in Hamburg (Germany)

## Import all necessary libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes 
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

import re
print('Libraries imported.')

import requests

from bs4 import BeautifulSoup

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

## Import data table with Hamburg districts and quarters from wikipedia using beautiful Soup

In [2]:
page = requests.get('https://de.wikipedia.org/wiki/Liste_der_Bezirke_und_Stadtteile_Hamburgs').text

In [3]:
soup = BeautifulSoup(page,'lxml')

## Find second table of wikipeda page and get all rows and columns

In [4]:
My_table = soup.findAll("table", class_='wikitable')[1]
rows = My_table.find_all('tr')
columns = [v.text.replace('\n', '') for v in rows[0].find_all('th')]

## Append all values to a pandas data frame

In [5]:
df = pd.DataFrame(columns=columns)
for i in range(1, len(rows)):
               tds = rows[i].find_all('td')
# Clean and append all values to the pandas DataFrame
               values = [td.text.replace('\n','') for td in tds]         
               df = df.append(pd.Series(values, index=columns), ignore_index=True)
df.head()

Unnamed: 0,Stadtteil,Ortsteile,Bezirk,Fläche(km²),Einwohner,Bevölkerungsdichte(Einwohner/km²),Koordinaten,Karte
0,Hamburg-Altstadt!Hamburg-Altstadt,101!101–102,Hamburg-Mitte,"02,4!2,4",2272.0,947,"53° 33′ 0″ N, 10° 0′ 0″ O",
1,HafenCity!HafenCity,103!103–104,Hamburg-Mitte,"02,2!2,2",4592.0,2087,"53° 32′ 28″ N, 10° 0′ 1″ O",
2,Neustadt!Neustadt,105!105–108,Hamburg-Mitte,"02,3!2,3",12.92,5617,"53° 33′ 7″ N, 9° 59′ 8″ O",
3,St.Pauli!St. Pauli,109!109–112,Hamburg-Mitte,"02,5!2,5",22.436,8974,"53° 33′ 25″ N, 9° 57′ 50″ O",
4,St.Georg!St. Georg,113!113–114,Hamburg-Mitte,"02,4!2,4",11.384,4743,"53° 33′ 18″ N, 10° 0′ 44″ O",


## Clean up and customize the data frame, translate headers from German to English

In [6]:
df['District'] = df['Stadtteil'].str.split('!', 1).str[0]
df['District_No'] = df['Ortsteile'].str.split('!', 1).str[1]
df['Area_sqkm'] = df['Fläche(km²)'].str.split('!', 1).str[1]
df[['Latitude', 'Longitude']] = df.Koordinaten.str.split(',',1, expand=True); df
df['Longitude'] = df['Longitude'].str.replace('O', 'E', regex=False)
df['Latitude'] = df['Latitude'].str.replace(' ','')
df['Longitude'] = df['Longitude'].str.replace(' ','')
df = df.drop(columns=['Stadtteil','Ortsteile','Fläche(km²)','Karte'])
df = df.rename(columns={"Bezirk": "Borough", "Einwohner": "Inhabitants", "Bevölkerungsdichte(Einwohner/km²)": "Population_density_per_sqkm"})

In [7]:
df = df[['Borough','District_No', 'District', 'Inhabitants', 'Area_sqkm', 'Population_density_per_sqkm', 'Latitude', 'Longitude']]
df.head()

Unnamed: 0,Borough,District_No,District,Inhabitants,Area_sqkm,Population_density_per_sqkm,Latitude,Longitude
0,Hamburg-Mitte,101–102,Hamburg-Altstadt,2272.0,24,947,53° 33′ 0″ N,10° 0′ 0″ E
1,Hamburg-Mitte,103–104,HafenCity,4592.0,22,2087,53° 32′ 28″ N,10° 0′ 1″ E
2,Hamburg-Mitte,105–108,Neustadt,12.92,23,5617,53° 33′ 7″ N,9° 59′ 8″ E
3,Hamburg-Mitte,109–112,St.Pauli,22.436,25,8974,53° 33′ 25″ N,9° 57′ 50″ E
4,Hamburg-Mitte,113–114,St.Georg,11.384,24,4743,53° 33′ 18″ N,10° 0′ 44″ E


In [8]:
# get the geographical coordinates of Hamburg (Germany)
address = 'Hamburg, HH'

geolocator = Nominatim(user_agent="hh_explorer")
location = geolocator.geocode(address)
HHlatitude = location.latitude
HHlongitude = location.longitude
print('The geographical coordinate of Hamburg are {}, {}.'.format(HHlatitude, HHlongitude))

The geographical coordinate of Hamburg are 53.550341, 10.000654.


In [9]:
# check columns 'Longitude' and 'Latitude'
df.Longitude.head(),df.Latitude.head()

(0     10° 0′ 0″ E
 1     10° 0′ 1″ E
 2     9° 59′ 8″ E
 3    9° 57′ 50″ E
 4    10° 0′ 44″ E
 Name: Longitude, dtype: object, 0     53° 33′ 0″ N
 1    53° 32′ 28″ N
 2     53° 33′ 7″ N
 3    53° 33′ 25″ N
 4    53° 33′ 18″ N
 Name: Latitude, dtype: object)

In [10]:
# functions to convert longitude and latitude values to decimal coordinates

def dms2dd(degrees, minutes, seconds, direction):
    dd = float(degrees) + float(minutes)/60 + float(seconds)/(60*60);
    if direction == 'W' or direction == 'S':
        dd *= -1
    return dd;

def dd2dms(deg):
    d = int(deg)
    md = abs(deg - d) * 60
    m = int(md)
    sd = (md - m) * 60
    return [d, m, sd]

def parse_dms(dms):
    parts = re.split('[^\d\w]+', dms)
    lat = dms2dd(parts[0], parts[1], parts[2], parts[3])

    return (lat)

In [11]:
# add all decimal latitude and longitude values to lists
n = len(df)
LatList = []
LongList = []

for i in range(n):
  Lat_d = round(parse_dms(df.Latitude[i].replace(" ", "")),4)
  Long_d = round(parse_dms(df.Longitude[i].replace(" ", "")),4)
  LatList.append(Lat_d)   
  LongList.append(Long_d)
    
# add the lists with decimal values to the existing data frame
df['Lat'] = LatList
df['Long'] = LongList

df.head()

Unnamed: 0,Borough,District_No,District,Inhabitants,Area_sqkm,Population_density_per_sqkm,Latitude,Longitude,Lat,Long
0,Hamburg-Mitte,101–102,Hamburg-Altstadt,2272.0,24,947,53° 33′ 0″ N,10° 0′ 0″ E,53.55,10.0
1,Hamburg-Mitte,103–104,HafenCity,4592.0,22,2087,53° 32′ 28″ N,10° 0′ 1″ E,53.5411,10.0003
2,Hamburg-Mitte,105–108,Neustadt,12.92,23,5617,53° 33′ 7″ N,9° 59′ 8″ E,53.5519,9.9856
3,Hamburg-Mitte,109–112,St.Pauli,22.436,25,8974,53° 33′ 25″ N,9° 57′ 50″ E,53.5569,9.9639
4,Hamburg-Mitte,113–114,St.Georg,11.384,24,4743,53° 33′ 18″ N,10° 0′ 44″ E,53.555,10.0122


## Create Hamburg map and add markers for districts to get a first overview

In [12]:
# create map of Hamburg using latitude and longitude values
map_hamburg = folium.Map(location=[HHlatitude, HHlongitude], zoom_start=11)

# add markers to Hamburg map
for Lat, Long, Borough, District in zip(df['Lat'], df['Long'], df['Borough'], df['District']):
    label = '{}, {}'.format(District, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [Lat, Long],
        popup=label).add_to(map_hamburg)
   
map_hamburg

## Generate a subset of the data frame

In [13]:
# convert population density to int
df.Population_density_per_sqkm = df.Population_density_per_sqkm.astype(int)
# generate a data frame which contains central boroughs only and a population density >4000 per km²
df_central_distr = df[df.Borough.isin(['Hamburg-Mitte','Altona']) & (df.Population_density_per_sqkm >= 4000)].reset_index(drop=True)

In [14]:
df_central_distr

Unnamed: 0,Borough,District_No,District,Inhabitants,Area_sqkm,Population_density_per_sqkm,Latitude,Longitude,Lat,Long
0,Hamburg-Mitte,105–108,Neustadt,12.92,23,5617,53° 33′ 7″ N,9° 59′ 8″ E,53.5519,9.9856
1,Hamburg-Mitte,109–112,St.Pauli,22.436,25,8974,53° 33′ 25″ N,9° 57′ 50″ E,53.5569,9.9639
2,Hamburg-Mitte,113–114,St.Georg,11.384,24,4743,53° 33′ 18″ N,10° 0′ 44″ E,53.555,10.0122
3,Hamburg-Mitte,119–120,Borgfelde,7696.0,8,9620,53° 33′ 17″ N,10° 2′ 4″ E,53.5547,10.0344
4,Hamburg-Mitte,121–127,Hamm,38.773,40,9693,53° 33′ 39″ N,10° 3′ 28″ E,53.5608,10.0578
5,Hamburg-Mitte,128–129,Horn,38.799,59,6576,53° 33′ 14″ N,10° 5′ 24″ E,53.5539,10.09
6,Hamburg-Mitte,130,Billstedt,70.355,170,4139,53° 32′ 26″ N,10° 6′ 4″ E,53.5406,10.1011
7,Altona,201–206,Altona-Altstadt,29.305,28,10466,53° 32′ 56″ N,9° 56′ 52″ E,53.5489,9.9478
8,Altona,207,Sternschanze,8095.0,5,16190,53° 33′ 42″ N,9° 57′ 44″ E,53.5617,9.9622
9,Altona,208–210,Altona-Nord,24.153,22,10979,53° 33′ 47″ N,9° 56′ 43″ E,53.5631,9.9453


In [15]:
# create a map of the district subset
map_hamburg = folium.Map(location=[HHlatitude, HHlongitude], zoom_start=12)

for Lat, Long, Borough, District in zip(df_central_distr['Lat'], df_central_distr['Long'], df_central_distr['Borough'], df_central_distr['District']):
    label = '{}, {}'.format(District, Borough)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [Lat, Long],
        popup=label).add_to(map_hamburg)       

map_hamburg

## Use Foursquare to explore the neighborhoods of central Hamburg districts

In [16]:
CLIENT_ID = 'D3BGST0IOL3VDGWJD35EXZTISHC3Z42RUGX2UWOQ5NUCWXTN' # your Foursquare ID
CLIENT_SECRET = 'K1GLHABDR3IG5MSNTMZHPSZWXRXMQ55IXK0NUQH1SZ4HXPLM' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: D3BGST0IOL3VDGWJD35EXZTISHC3Z42RUGX2UWOQ5NUCWXTN
CLIENT_SECRET:K1GLHABDR3IG5MSNTMZHPSZWXRXMQ55IXK0NUQH1SZ4HXPLM


In [17]:
df_central_distr.loc[0, 'District'],df_central_distr.loc[0, 'Lat'],df_central_distr.loc[0, 'Long']

('Neustadt', 53.5519, 9.9856)

In [18]:
# 150 venues that are in Neustadt within a radius of 500 meters
LIMIT = 150
radius = 1000
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    df_central_distr.loc[0, 'Lat'], 
    df_central_distr.loc[0, 'Long'], 
    radius, 
    LIMIT)
url # display URL


'https://api.foursquare.com/v2/venues/explore?&client_id=D3BGST0IOL3VDGWJD35EXZTISHC3Z42RUGX2UWOQ5NUCWXTN&client_secret=K1GLHABDR3IG5MSNTMZHPSZWXRXMQ55IXK0NUQH1SZ4HXPLM&v=20180605&ll=53.5519,9.9856&radius=1000&limit=150'

In [19]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5dc3dd232619ee002cb8ecb3'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Neustadt',
  'headerFullLocation': 'Neustadt, Hamburg',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 241,
  'suggestedBounds': {'ne': {'lat': 53.560900009000015,
    'lng': 10.00072083791351},
   'sw': {'lat': 53.54289999099999, 'lng': 9.97047916208649}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '52e12f23498eab3870ffa96c',
       'name': 'Public Coffee Roasters',
       'location': {'address': 'Wexstr. 28',
        'lat': 53.551372,
        'lng': 9.982597,
        'labeledLatLngs': [{'label': 'display',
          'lat': 53.551372,
          'ln

In [20]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [21]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Public Coffee Roasters,Coffee Shop,53.551372,9.982597
1,Tortue,Hotel,53.551437,9.986531
2,Die Bank,Restaurant,53.553501,9.987226
3,Zum Spätzle,Swabian Restaurant,53.551033,9.981608
4,Kardelen,Turkish Restaurant,53.550667,9.982271


In [22]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


In [23]:
#function to repeat the venues for all districts in central Hamburg
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'District_Latitude', 
                  'District_Longitude', 
                  'Venue', 
                  'Venue_Latitude', 
                  'Venue_Longitude', 
                  'Venue_Category']
    
    return(nearby_venues)

In [24]:
central_venues = getNearbyVenues(names=df_central_distr['District'],
                                   latitudes=df_central_distr['Lat'],
                                   longitudes=df_central_distr['Long']
                                  )

Neustadt
St.Pauli
St.Georg
Borgfelde
Hamm
Horn
Billstedt
Altona-Altstadt
Sternschanze
Altona-Nord
Ottensen
Groß Flottbek
Lurup
Iserbrook


In [25]:
# size of the resulting dataframe
print(central_venues.shape)
central_venues

(486, 7)


Unnamed: 0,District,District_Latitude,District_Longitude,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,Neustadt,53.5519,9.9856,Public Coffee Roasters,53.551372,9.982597,Coffee Shop
1,Neustadt,53.5519,9.9856,Tortue,53.551437,9.986531,Hotel
2,Neustadt,53.5519,9.9856,Die Bank,53.553501,9.987226,Restaurant
3,Neustadt,53.5519,9.9856,Kardelen,53.550667,9.982271,Turkish Restaurant
4,Neustadt,53.5519,9.9856,Zum Spätzle,53.551033,9.981608,Swabian Restaurant
5,Neustadt,53.5519,9.9856,Bootshaus Kombüse,53.550701,9.982247,Restaurant
6,Neustadt,53.5519,9.9856,Jin Gui,53.551486,9.986556,Asian Restaurant
7,Neustadt,53.5519,9.9856,Erste Liebe,53.549359,9.985087,Café
8,Neustadt,53.5519,9.9856,Marinehof,53.549374,9.985253,German Restaurant
9,Neustadt,53.5519,9.9856,Karlsons,53.550394,9.982171,Scandinavian Restaurant


## Get the venues in central Hamburg with categories "Vegetarian/Vegan- and Falafel Restaurant" only

In [35]:
df_central_veg = central_venues[central_venues.Venue_Category.isin(['Falafel Restaurant','Vegetarian / Vegan Restaurant'])].reset_index(drop=True)
df_central_veg

Unnamed: 0,District,District_Latitude,District_Longitude,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,Neustadt,53.5519,9.9856,Nasch,53.555528,9.984067,Vegetarian / Vegan Restaurant
1,Neustadt,53.5519,9.9856,Loving Hut,53.551646,9.980435,Vegetarian / Vegan Restaurant
2,St.Pauli,53.5569,9.9639,Azeitona,53.558792,9.965223,Falafel Restaurant
3,St.Pauli,53.5569,9.9639,Happenpappen,53.557484,9.970221,Vegetarian / Vegan Restaurant
4,St.Georg,53.555,10.0122,Falafel L'Amira,53.554242,10.014367,Falafel Restaurant
5,St.Georg,53.555,10.0122,Café Koppel,53.558304,10.01178,Vegetarian / Vegan Restaurant
6,St.Georg,53.555,10.0122,Loving Hut,53.553915,10.019435,Vegetarian / Vegan Restaurant
7,Sternschanze,53.5617,9.9622,KIMO,53.564308,9.965746,Falafel Restaurant
8,Sternschanze,53.5617,9.9622,Azeitona,53.558792,9.965223,Falafel Restaurant
9,Sternschanze,53.5617,9.9622,Falafelstern,53.564272,9.965629,Falafel Restaurant


In [42]:
#delete double entry 'Azeitona'
df_central_veg.drop(df_central_veg.index[8]).reset_index(drop=True)

Unnamed: 0,District,District_Latitude,District_Longitude,Venue,Venue_Latitude,Venue_Longitude,Venue_Category
0,Neustadt,53.5519,9.9856,Nasch,53.555528,9.984067,Vegetarian / Vegan Restaurant
1,Neustadt,53.5519,9.9856,Loving Hut,53.551646,9.980435,Vegetarian / Vegan Restaurant
2,St.Pauli,53.5569,9.9639,Azeitona,53.558792,9.965223,Falafel Restaurant
3,St.Pauli,53.5569,9.9639,Happenpappen,53.557484,9.970221,Vegetarian / Vegan Restaurant
4,St.Georg,53.555,10.0122,Falafel L'Amira,53.554242,10.014367,Falafel Restaurant
5,St.Georg,53.555,10.0122,Café Koppel,53.558304,10.01178,Vegetarian / Vegan Restaurant
6,St.Georg,53.555,10.0122,Loving Hut,53.553915,10.019435,Vegetarian / Vegan Restaurant
7,Sternschanze,53.5617,9.9622,KIMO,53.564308,9.965746,Falafel Restaurant
8,Sternschanze,53.5617,9.9622,Falafelstern,53.564272,9.965629,Falafel Restaurant
9,Sternschanze,53.5617,9.9622,Mamalicious,53.56368,9.959807,Vegetarian / Vegan Restaurant


In [43]:
# print size of the vegetarian/vegan restaurants data frame
print(df_central_veg.shape)

(12, 7)


In [44]:
# create map of Hamburg using latitude and longitude values
map_hamburg_veg = folium.Map(location=[HHlatitude, HHlongitude], zoom_start=13)

# add markers map
for Venue_Latitude, Venue_Longitude, District, Venue in zip(df_central_veg['Venue_Latitude'], df_central_veg['Venue_Longitude'], df_central_veg['District'], df_central_veg['Venue']):
    label = '{}, {}'.format(Venue, District)
    label = folium.Popup(label, parse_html=True)
    folium.Marker(
        [Venue_Latitude, Venue_Longitude],
        popup=label).add_to(map_hamburg_veg)
   
map_hamburg_veg

In [48]:
# how many venues were returned for each district?
central_venues.groupby('District').count()

Unnamed: 0_level_0,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Altona-Altstadt,36,36,36,36,36,36
Altona-Nord,25,25,25,25,25,25
Billstedt,29,29,29,29,29,29
Borgfelde,9,9,9,9,9,9
Groß Flottbek,10,10,10,10,10,10
Hamm,11,11,11,11,11,11
Horn,7,7,7,7,7,7
Iserbrook,4,4,4,4,4,4
Lurup,3,3,3,3,3,3
Neustadt,100,100,100,100,100,100


In [58]:
print('There are {} uniques categories.'.format(len(central_venues['Venue Category'].unique())))

There are 142 uniques categories.


In [59]:
# one hot encoding
hamburg_central_onehot = pd.get_dummies(central_venues[['Venue Category']], prefix="", prefix_sep="")

# add district column back to dataframe
hamburg_central_onehot['District'] = central_venues['District'] 

# move district column to the first column
fixed_columns = [hamburg_central_onehot.columns[-1]] + list(hamburg_central_onehot.columns[:-1])
hamburg_central_onehot = hamburg_central_onehot[fixed_columns]

hamburg_central_onehot.head()

Unnamed: 0,District,Accessories Store,Arcade,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Austrian Restaurant,BBQ Joint,Bakery,Bar,Bavarian Restaurant,Beach Bar,Beer Bar,Beer Store,Big Box Store,Bike Shop,Board Shop,Bookstore,Boutique,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Café,Cheese Shop,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Convenience Store,Cosmetics Shop,Currywurst Joint,Department Store,Design Studio,Dessert Shop,Doner Restaurant,Drugstore,Electronics Store,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food & Drink Shop,Football Stadium,French Restaurant,Furniture / Home Store,Gastropub,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Halal Restaurant,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Kumpir Restaurant,Leather Goods Store,Light Rail Station,Liquor Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Movie Theater,Music Venue,Neighborhood,Nightclub,Optical Shop,Organic Grocery,Other Nightlife,Palatine Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Portuguese Restaurant,Pub,Record Shop,Restaurant,Road,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Souvlaki Shop,Spa,Sporting Goods Shop,Sports Club,Sri Lankan Restaurant,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Swabian Restaurant,Tapas Restaurant,Taverna,Thai Restaurant,Theater,Trail,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop
0,Neustadt,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Neustadt,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Neustadt,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Neustadt,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
4,Neustadt,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [60]:
hamburg_central_onehot.shape

(491, 143)

In [61]:
hamburg_grouped = hamburg_central_onehot.groupby('District').mean().reset_index()
hamburg_grouped

Unnamed: 0,District,Accessories Store,Arcade,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Austrian Restaurant,BBQ Joint,Bakery,Bar,Bavarian Restaurant,Beach Bar,Beer Bar,Beer Store,Big Box Store,Bike Shop,Board Shop,Bookstore,Boutique,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Café,Cheese Shop,Chinese Restaurant,Church,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Convenience Store,Cosmetics Shop,Currywurst Joint,Department Store,Design Studio,Dessert Shop,Doner Restaurant,Drugstore,Electronics Store,Fabric Shop,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Flower Shop,Food & Drink Shop,Football Stadium,French Restaurant,Furniture / Home Store,Gastropub,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Halal Restaurant,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Kumpir Restaurant,Leather Goods Store,Light Rail Station,Liquor Store,Lounge,Market,Mediterranean Restaurant,Men's Store,Mexican Restaurant,Middle Eastern Restaurant,Movie Theater,Music Venue,Neighborhood,Nightclub,Optical Shop,Organic Grocery,Other Nightlife,Palatine Restaurant,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Persian Restaurant,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Portuguese Restaurant,Pub,Record Shop,Restaurant,Road,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Seafood Restaurant,Shipping Store,Shoe Store,Shopping Mall,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Souvlaki Shop,Spa,Sporting Goods Shop,Sports Club,Sri Lankan Restaurant,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Swabian Restaurant,Tapas Restaurant,Taverna,Thai Restaurant,Theater,Trail,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Venezuelan Restaurant,Video Store,Vietnamese Restaurant,Wine Bar,Wine Shop
0,Altona-Altstadt,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.083333,0.0,0.027778,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.027778,0.027778,0.0,0.027778,0.0,0.055556,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.0,0.0,0.027778,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Altona-Nord,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0
2,Billstedt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.103448,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.034483,0.034483,0.034483,0.034483,0.0,0.103448,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.034483,0.0,0.068966,0.0,0.034483,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0
3,Borgfelde,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.222222,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0
4,Groß Flottbek,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Hamm,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.181818,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Horn,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.285714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Iserbrook,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Lurup,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Neustadt,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.03,0.01,0.0,0.01,0.01,0.0,0.0,0.06,0.0,0.0,0.0,0.04,0.0,0.05,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.04,0.02,0.01,0.03,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.06,0.01,0.01,0.0,0.0,0.01,0.04,0.01,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.03,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.03,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.02,0.0,0.0,0.0,0.01,0.0


In [62]:
hamburg_grouped.shape

(14, 143)

In [63]:
num_top_venues = 5

for hood in hamburg_grouped['District']:
    print("----"+hood+"----")
    temp = hamburg_grouped[hamburg_grouped['District'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Altona-Altstadt----
                venue  freq
0               Hotel  0.08
1          Restaurant  0.08
2                 Pub  0.06
3              Bakery  0.06
4  Italian Restaurant  0.06


----Altona-Nord----
               venue  freq
0          Nightclub  0.08
1     Shipping Store  0.04
2   Football Stadium  0.04
3  French Restaurant  0.04
4              Plaza  0.04


----Billstedt----
                  venue  freq
0        Clothing Store  0.10
1  Fast Food Restaurant  0.10
2           Supermarket  0.07
3    Seafood Restaurant  0.07
4                   Gym  0.03


----Borgfelde----
              venue  freq
0             Hotel  0.22
1        Restaurant  0.11
2              Park  0.11
3              Café  0.11
4  Asian Restaurant  0.11


----Groß Flottbek----
               venue  freq
0        Supermarket   0.2
1         Restaurant   0.1
2             Bakery   0.1
3               Café   0.1
4  German Restaurant   0.1


----Hamm----
                venue  freq
0              Bake

In [64]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [69]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['District'] = hamburg_grouped['District']

for ind in np.arange(hamburg_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(hamburg_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Altona-Altstadt,Restaurant,Hotel,Pub,Seafood Restaurant,Italian Restaurant,Bakery,Performing Arts Venue,Cocktail Bar,Plaza,Pizza Place
1,Altona-Nord,Nightclub,Street Food Gathering,Comedy Club,Shipping Store,Brewery,Football Stadium,French Restaurant,German Restaurant,Big Box Store,Restaurant
2,Billstedt,Clothing Store,Fast Food Restaurant,Seafood Restaurant,Supermarket,Ice Cream Shop,Mediterranean Restaurant,Cosmetics Shop,Restaurant,Pastry Shop,Drugstore
3,Borgfelde,Hotel,Restaurant,Video Store,Gastropub,Asian Restaurant,Theater,Café,Park,Wine Shop,Fast Food Restaurant
4,Groß Flottbek,Supermarket,Restaurant,Farmers Market,German Restaurant,Café,Hotel,Pharmacy,Bakery,Grocery Store,Flower Shop
5,Hamm,Bakery,Italian Restaurant,Drugstore,Fast Food Restaurant,Café,Soccer Field,Park,Greek Restaurant,German Restaurant,Light Rail Station
6,Horn,Supermarket,Doner Restaurant,Drugstore,Park,Grocery Store,Bus Stop,Wine Shop,Flower Shop,Football Stadium,Food & Drink Shop
7,Iserbrook,Bakery,Pet Store,Supermarket,Organic Grocery,Drugstore,Electronics Store,Fabric Shop,Falafel Restaurant,Doner Restaurant,Furniture / Home Store
8,Lurup,Supermarket,Playground,Bus Stop,Wine Shop,Fast Food Restaurant,French Restaurant,Football Stadium,Food & Drink Shop,Flower Shop,Flea Market
9,Neustadt,Café,Hotel,Coffee Shop,French Restaurant,Italian Restaurant,Clothing Store,German Restaurant,Cosmetics Shop,Restaurant,Sushi Restaurant


In [92]:
# set number of clusters
kclusters = 4
hamburg_grouped_clustering = hamburg_grouped.drop('District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(hamburg_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 0, 0, 0, 0, 0, 3, 2, 1, 0], dtype=int32)

In [94]:
# add clustering labels
#neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

hamburg_merged = df_central_distr

hamburg_merged = hamburg_merged.join(neighborhoods_venues_sorted.set_index('District'), on='District')

hamburg_merged # check the last columns!

Unnamed: 0,Borough,District_No,District,Inhabitants,Area_sqkm,Population_density_per_sqkm,Latitude,Longitude,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Hamburg-Mitte,105–108,Neustadt,12.92,23,5617,53° 33′ 7″ N,9° 59′ 8″ E,53.5519,9.9856,0,Café,Hotel,Coffee Shop,French Restaurant,Italian Restaurant,Clothing Store,German Restaurant,Cosmetics Shop,Restaurant,Sushi Restaurant
1,Hamburg-Mitte,109–112,St.Pauli,22.436,25,8974,53° 33′ 25″ N,9° 57′ 50″ E,53.5569,9.9639,0,Bar,Café,Coffee Shop,Seafood Restaurant,Soccer Stadium,Sporting Goods Shop,Bookstore,Austrian Restaurant,Music Venue,Hostel
2,Hamburg-Mitte,113–114,St.Georg,11.384,24,4743,53° 33′ 18″ N,10° 0′ 44″ E,53.555,10.0122,0,Italian Restaurant,Hotel,Café,Cocktail Bar,Coffee Shop,Indian Restaurant,French Restaurant,Bakery,Restaurant,Burger Joint
3,Hamburg-Mitte,119–120,Borgfelde,7696.0,8,9620,53° 33′ 17″ N,10° 2′ 4″ E,53.5547,10.0344,4,Hotel,Restaurant,Video Store,Gastropub,Asian Restaurant,Theater,Café,Park,Wine Shop,Fast Food Restaurant
4,Hamburg-Mitte,121–127,Hamm,38.773,40,9693,53° 33′ 39″ N,10° 3′ 28″ E,53.5608,10.0578,0,Bakery,Italian Restaurant,Drugstore,Fast Food Restaurant,Café,Soccer Field,Park,Greek Restaurant,German Restaurant,Light Rail Station
5,Hamburg-Mitte,128–129,Horn,38.799,59,6576,53° 33′ 14″ N,10° 5′ 24″ E,53.5539,10.09,3,Supermarket,Doner Restaurant,Drugstore,Park,Grocery Store,Bus Stop,Wine Shop,Flower Shop,Football Stadium,Food & Drink Shop
6,Hamburg-Mitte,130,Billstedt,70.355,170,4139,53° 32′ 26″ N,10° 6′ 4″ E,53.5406,10.1011,0,Clothing Store,Fast Food Restaurant,Seafood Restaurant,Supermarket,Ice Cream Shop,Mediterranean Restaurant,Cosmetics Shop,Restaurant,Pastry Shop,Drugstore
7,Altona,201–206,Altona-Altstadt,29.305,28,10466,53° 32′ 56″ N,9° 56′ 52″ E,53.5489,9.9478,0,Restaurant,Hotel,Pub,Seafood Restaurant,Italian Restaurant,Bakery,Performing Arts Venue,Cocktail Bar,Plaza,Pizza Place
8,Altona,207,Sternschanze,8095.0,5,16190,53° 33′ 42″ N,9° 57′ 44″ E,53.5617,9.9622,0,Bar,Coffee Shop,Café,Clothing Store,Restaurant,Gastropub,Falafel Restaurant,Pizza Place,Cocktail Bar,Burger Joint
9,Altona,208–210,Altona-Nord,24.153,22,10979,53° 33′ 47″ N,9° 56′ 43″ E,53.5631,9.9453,0,Nightclub,Street Food Gathering,Comedy Club,Shipping Store,Brewery,Football Stadium,French Restaurant,German Restaurant,Big Box Store,Restaurant


In [98]:
# create map
map_clusters = folium.Map(location=[HHlatitude, HHlongitude], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(hamburg_merged['Lat'], hamburg_merged['Long'], hamburg_merged['District'], hamburg_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [99]:
# Cluster 1
hamburg_merged.loc[hamburg_merged['Cluster Labels'] == 0, hamburg_merged.columns[[2] + list(range(6, hamburg_merged.shape[1]))]]

Unnamed: 0,District,Latitude,Longitude,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Neustadt,53° 33′ 7″ N,9° 59′ 8″ E,53.5519,9.9856,0,Café,Hotel,Coffee Shop,French Restaurant,Italian Restaurant,Clothing Store,German Restaurant,Cosmetics Shop,Restaurant,Sushi Restaurant
1,St.Pauli,53° 33′ 25″ N,9° 57′ 50″ E,53.5569,9.9639,0,Bar,Café,Coffee Shop,Seafood Restaurant,Soccer Stadium,Sporting Goods Shop,Bookstore,Austrian Restaurant,Music Venue,Hostel
2,St.Georg,53° 33′ 18″ N,10° 0′ 44″ E,53.555,10.0122,0,Italian Restaurant,Hotel,Café,Cocktail Bar,Coffee Shop,Indian Restaurant,French Restaurant,Bakery,Restaurant,Burger Joint
4,Hamm,53° 33′ 39″ N,10° 3′ 28″ E,53.5608,10.0578,0,Bakery,Italian Restaurant,Drugstore,Fast Food Restaurant,Café,Soccer Field,Park,Greek Restaurant,German Restaurant,Light Rail Station
6,Billstedt,53° 32′ 26″ N,10° 6′ 4″ E,53.5406,10.1011,0,Clothing Store,Fast Food Restaurant,Seafood Restaurant,Supermarket,Ice Cream Shop,Mediterranean Restaurant,Cosmetics Shop,Restaurant,Pastry Shop,Drugstore
7,Altona-Altstadt,53° 32′ 56″ N,9° 56′ 52″ E,53.5489,9.9478,0,Restaurant,Hotel,Pub,Seafood Restaurant,Italian Restaurant,Bakery,Performing Arts Venue,Cocktail Bar,Plaza,Pizza Place
8,Sternschanze,53° 33′ 42″ N,9° 57′ 44″ E,53.5617,9.9622,0,Bar,Coffee Shop,Café,Clothing Store,Restaurant,Gastropub,Falafel Restaurant,Pizza Place,Cocktail Bar,Burger Joint
9,Altona-Nord,53° 33′ 47″ N,9° 56′ 43″ E,53.5631,9.9453,0,Nightclub,Street Food Gathering,Comedy Club,Shipping Store,Brewery,Football Stadium,French Restaurant,German Restaurant,Big Box Store,Restaurant
10,Ottensen,53° 33′ 14″ N,9° 55′ 4″ E,53.5539,9.9178,0,Soccer Field,Bus Stop,Ice Cream Shop,Playground,Bakery,Café,Church,Grocery Store,Taverna,Italian Restaurant
11,Groß Flottbek,53° 33′ 55″ N,9° 52′ 39″ E,53.5653,9.8775,0,Supermarket,Restaurant,Farmers Market,German Restaurant,Café,Hotel,Pharmacy,Bakery,Grocery Store,Flower Shop


In [100]:
# Cluster 2
hamburg_merged.loc[hamburg_merged['Cluster Labels'] == 1, hamburg_merged.columns[[2] + list(range(6, hamburg_merged.shape[1]))]]

Unnamed: 0,District,Latitude,Longitude,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
12,Lurup,53° 35′ 35″ N,9° 52′ 58″ E,53.5931,9.8828,1,Supermarket,Playground,Bus Stop,Wine Shop,Fast Food Restaurant,French Restaurant,Football Stadium,Food & Drink Shop,Flower Shop,Flea Market


In [101]:
# Cluster 3
hamburg_merged.loc[hamburg_merged['Cluster Labels'] == 2, hamburg_merged.columns[[2] + list(range(6, hamburg_merged.shape[1]))]]

Unnamed: 0,District,Latitude,Longitude,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
13,Iserbrook,53° 34′ 35″ N,9° 49′ 24″ E,53.5764,9.8233,2,Bakery,Pet Store,Supermarket,Organic Grocery,Drugstore,Electronics Store,Fabric Shop,Falafel Restaurant,Doner Restaurant,Furniture / Home Store


In [102]:
# Cluster 4
hamburg_merged.loc[hamburg_merged['Cluster Labels'] == 4, hamburg_merged.columns[[2] + list(range(6, hamburg_merged.shape[1]))]]

Unnamed: 0,District,Latitude,Longitude,Lat,Long,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
3,Borgfelde,53° 33′ 17″ N,10° 2′ 4″ E,53.5547,10.0344,4,Hotel,Restaurant,Video Store,Gastropub,Asian Restaurant,Theater,Café,Park,Wine Shop,Fast Food Restaurant
