# Stuttgart Neighborhoods

### Import libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
import bs4 # BeautifulSoup for webscraping
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe
import geocoder # for requesting coordinates via the google package

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

import os

import collections

# Get the neighborhoods and districts of Stuttgart, Germany from Wikipedia via webscraping

URL to be used: https://de.wikipedia.org/wiki/Liste_der_Stadtbezirke_und_Stadtteile_von_Stuttgart

- Assign the above URL to the variable "wiki_stuttgart_list_url"

- Send the GET request and save the result in the variable "res"

- Create a BeautifulSoup object from the result text (res.text) called "soup"

In [2]:
wiki_stuttgart_list_url = 'https://de.wikipedia.org/wiki/Liste_der_Stadtbezirke_und_Stadtteile_von_Stuttgart'
res = requests.get(wiki_stuttgart_list_url)
soup = bs4.BeautifulSoup(res.text,"lxml")

- get the names of the districts of Stuttgart
- first find out which table shall be used to find out all the names of the districts
- after going through all the tables, I found out that the table at index 2 shall be used to get the names

In [3]:
tables = soup.findAll('table')
table_with_names = tables[2]
stuttgart_districts_names = []
names_with_brackets = ["Birkach",
                       "Feuerbach",
                       "Möhringen",
                       "Mühlhausen",
                       "Münster",
                       "Stammheim",
                       "Vaihingen",
                       "Wangen"]
names_for_webscraping = []

for idx, row in enumerate(table_with_names.findAll('td')):
    if row.a:
        if row.a.text:
            if " " in row.a.text:
                tmp = row.a.text.replace(" ", "_")
                stuttgart_districts_names.append(tmp)
            else:
                stuttgart_districts_names.append(row.a.text)
            
# since Birkachs URL is Birkach_(Stuttgart), we will replace it accordingly
for idx, name in enumerate(stuttgart_districts_names):
    if name in names_with_brackets:
        names_for_webscraping.append(f"{name}_(Stuttgart)")
    else:
        names_for_webscraping.append(name)

print("size: ", len(names_for_webscraping))
names_for_webscraping

size:  23


['Stuttgart-Mitte',
 'Stuttgart-Nord',
 'Stuttgart-Ost',
 'Stuttgart-Süd',
 'Stuttgart-West',
 'Bad_Cannstatt',
 'Birkach_(Stuttgart)',
 'Botnang',
 'Degerloch',
 'Feuerbach_(Stuttgart)',
 'Hedelfingen',
 'Möhringen_(Stuttgart)',
 'Mühlhausen_(Stuttgart)',
 'Münster_(Stuttgart)',
 'Obertürkheim',
 'Plieningen',
 'Sillenbuch',
 'Stammheim_(Stuttgart)',
 'Untertürkheim',
 'Vaihingen_(Stuttgart)',
 'Wangen_(Stuttgart)',
 'Weilimdorf',
 'Zuffenhausen']

Now that we have our names, we want to find out the postal codes of the districts:

In [4]:
postal_codes_list = []

for district in names_for_webscraping:
    wiki_stuttgart_district_url = f"https://de.wikipedia.org/wiki/{district}"
    res_district = requests.get(wiki_stuttgart_district_url)
    soup_district = bs4.BeautifulSoup(res_district.text,"lxml")
    
    table_district = soup_district.find('table')
    for idx, row in enumerate(table_district.findAll('td')):
        if row.text.startswith("70"):
            removed_newline = row.text.rstrip("\n")
            removed_other = removed_newline.rstrip("\xa0ha")
            if (len(removed_other) > 4):
                postal_codes_list.append(removed_other)
        
postal_codes_list

['70173, 70174, 70178, 70182',
 '70174, 70191, 70192',
 '70148–70190',
 '70178, 70180, 70184, 70199, 70569',
 '70176, 70178, 70193, 70197',
 '70331–70378',
 '70599',
 '70195',
 '70597',
 '70469, 70499',
 '70329',
 '70567, 70565',
 '70378, 70437',
 '70376',
 '70329',
 '70599',
 '70619',
 '70439',
 '70327',
 '70563, 70565, 70569',
 '70327',
 '70499',
 '70435, 70437, 70439']

For districts with hyphen in the postal code we will concentrate on the 5 biggest postal codes:

In [5]:
postal_codes_bad_cannstatt_pre = "70331–70378"
postal_codes_bad_cannstatt_top_5 = "70332, 70372, 70374, 70376, 70378"
postal_codes_stuttgart_ost_pre = "70148–70190"
postal_codes_stuttgart_ost_top_5 = "70184, 70186, 70188, 70190, 70327"

for idx,code in enumerate(postal_codes_list):
    
    if code == postal_codes_bad_cannstatt_pre:
        postal_codes_list[idx] = postal_codes_bad_cannstatt_top_5
    elif code == postal_codes_stuttgart_ost_pre:
        postal_codes_list[idx] = postal_codes_stuttgart_ost_top_5
        

postal_codes_list

['70173, 70174, 70178, 70182',
 '70174, 70191, 70192',
 '70184, 70186, 70188, 70190, 70327',
 '70178, 70180, 70184, 70199, 70569',
 '70176, 70178, 70193, 70197',
 '70332, 70372, 70374, 70376, 70378',
 '70599',
 '70195',
 '70597',
 '70469, 70499',
 '70329',
 '70567, 70565',
 '70378, 70437',
 '70376',
 '70329',
 '70599',
 '70619',
 '70439',
 '70327',
 '70563, 70565, 70569',
 '70327',
 '70499',
 '70435, 70437, 70439']

Create pandas Dataframe:

- Every row one postal code

- For districst with more than one postal code, we will combine the name with the postal code and save it as new name (e.g. Stuttgart-Mitte_70173)

In [6]:
df_list = []

for idx, code in enumerate(postal_codes_list):
    splitted_code = code.split(",")
    
    if len(splitted_code) > 1:
        for idx_c, c in enumerate(splitted_code):
            df_row = {}
            c_without_spaces = c.replace(" ", "")
            df_row['District'] = f"{stuttgart_districts_names[idx]}_{c_without_spaces}"
            df_row['PostalCode'] = c_without_spaces
            df_list.append(df_row)
    else:
        df_row = {}
        df_row['District'] = stuttgart_districts_names[idx]
        df_row['PostalCode'] = code
        df_list.append(df_row)
    
df = pd.DataFrame(df_list)
df

Unnamed: 0,District,PostalCode
0,Stuttgart-Mitte_70173,70173
1,Stuttgart-Mitte_70174,70174
2,Stuttgart-Mitte_70178,70178
3,Stuttgart-Mitte_70182,70182
4,Stuttgart-Nord_70174,70174
5,Stuttgart-Nord_70191,70191
6,Stuttgart-Nord_70192,70192
7,Stuttgart-Ost_70184,70184
8,Stuttgart-Ost_70186,70186
9,Stuttgart-Ost_70188,70188


Check if postal codes appear in more than one districts

In [7]:
df['PostalCode'].value_counts()

70327    3
70178    3
70437    2
70565    2
70376    2
70184    2
70569    2
70439    2
70499    2
70174    2
70599    2
70329    2
70378    2
70173    1
70567    1
70191    1
70190    1
70469    1
70193    1
70180    1
70332    1
70197    1
70186    1
70195    1
70182    1
70619    1
70199    1
70597    1
70188    1
70374    1
70435    1
70192    1
70176    1
70563    1
70372    1
Name: PostalCode, dtype: int64

Check which districts share the same postal code and combine their names:

In [8]:
df[df['PostalCode'] == '70178']

Unnamed: 0,District,PostalCode
2,Stuttgart-Mitte_70178,70178
12,Stuttgart-Süd_70178,70178
18,Stuttgart-West_70178,70178


In [9]:
new_row = {}

new_row['District'] = "Stuttgart-Mitte-Süd-West_70178"
new_row['PostalCode'] = "70178"

df = df.append(new_row, ignore_index=True)
df.drop([2, 12, 18], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70178']

Unnamed: 0,District,PostalCode
47,Stuttgart-Mitte-Süd-West_70178,70178


In [10]:
df[df['PostalCode'] == '70327']

Unnamed: 0,District,PostalCode
10,Stuttgart-Ost_70327,70327
38,Untertürkheim,70327
42,Wangen,70327


In [11]:
new_row = {}

new_row['District'] = "Stuttgart-Ost-Untertürkheim-Wangen_70327"
new_row['PostalCode'] = "70327"

df = df.append(new_row, ignore_index=True)
df.drop([10, 38, 42], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70327']

Unnamed: 0,District,PostalCode
45,Stuttgart-Ost-Untertürkheim-Wangen_70327,70327


In [12]:
df[df['PostalCode'] == '70329']

Unnamed: 0,District,PostalCode
27,Hedelfingen,70329
33,Obertürkheim,70329


In [13]:
new_row['District'] = "Stuttgart-Hedelfingen-Obertürkheim_70329"
new_row['PostalCode'] = "70329"

df = df.append(new_row, ignore_index=True)
df.drop([27, 33], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70329']

Unnamed: 0,District,PostalCode
44,Stuttgart-Hedelfingen-Obertürkheim_70329,70329


In [14]:
df[df['PostalCode'] == '70376']

Unnamed: 0,District,PostalCode
20,Bad_Cannstatt_70376,70376
31,Münster,70376


In [15]:
new_row['District'] = "Stuttgart-Bad_Cannstatt-Münster_70376"
new_row['PostalCode'] = "70376"

df = df.append(new_row, ignore_index=True)
df.drop([20, 31], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70376']

Unnamed: 0,District,PostalCode
43,Stuttgart-Bad_Cannstatt-Münster_70376,70376


In [16]:
df[df['PostalCode'] == '70439']

Unnamed: 0,District,PostalCode
32,Stammheim,70439
39,Zuffenhausen_70439,70439


In [17]:
new_row['District'] = "Stuttgart-Stammheim-Zuffenhausen_70439"
new_row['PostalCode'] = "70439"

df = df.append(new_row, ignore_index=True)
df.drop([32, 39], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70439']

Unnamed: 0,District,PostalCode
42,Stuttgart-Stammheim-Zuffenhausen_70439,70439


In [18]:
df[df['PostalCode'] == '70437']

Unnamed: 0,District,PostalCode
29,Mühlhausen_70437,70437
37,Zuffenhausen_70437,70437


In [19]:
new_row['District'] = "Stuttgart-Mühlhausen-Zuffenhausen_70437"
new_row['PostalCode'] = "70437"

df = df.append(new_row, ignore_index=True)
df.drop([29, 37], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70437']

Unnamed: 0,District,PostalCode
41,Stuttgart-Mühlhausen-Zuffenhausen_70437,70437


In [20]:
df[df['PostalCode'] == '70569']

Unnamed: 0,District,PostalCode
13,Stuttgart-Süd_70569,70569
33,Vaihingen_70569,70569


In [21]:
new_row['District'] = "Stuttgart-Süd-Vaihingen_70569"
new_row['PostalCode'] = "70569"

df = df.append(new_row, ignore_index=True)
df.drop([13, 33], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70569']

Unnamed: 0,District,PostalCode
40,Stuttgart-Süd-Vaihingen_70569,70569


In [22]:
df[df['PostalCode'] == '70599']

Unnamed: 0,District,PostalCode
20,Birkach,70599
28,Plieningen,70599


In [23]:
new_row['District'] = "Stuttgart-Birkach-Plieningen_70599"
new_row['PostalCode'] = "70599"

df = df.append(new_row, ignore_index=True)
df.drop([20, 28], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70599']

Unnamed: 0,District,PostalCode
39,Stuttgart-Birkach-Plieningen_70599,70599


In [24]:
df[df['PostalCode'] == '70378']

Unnamed: 0,District,PostalCode
19,Bad_Cannstatt_70378,70378
26,Mühlhausen_70378,70378


In [25]:
new_row['District'] = "Stuttgart-Bad_Cannstatt-Mühlhausen_70378"
new_row['PostalCode'] = "70378"

df = df.append(new_row, ignore_index=True)
df.drop([19, 26], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70378']

Unnamed: 0,District,PostalCode
38,Stuttgart-Bad_Cannstatt-Mühlhausen_70378,70378


In [26]:
df[df['PostalCode'] == '70565']

Unnamed: 0,District,PostalCode
24,Möhringen_70565,70565
27,Vaihingen_70565,70565


In [27]:
new_row['District'] = "Stuttgart-Möhringen-Vaihingen_70565"
new_row['PostalCode'] = "70565"

df = df.append(new_row, ignore_index=True)
df.drop([24, 27], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70565']

Unnamed: 0,District,PostalCode
37,Stuttgart-Möhringen-Vaihingen_70565,70565


In [28]:
df[df['PostalCode'] == '70174']

Unnamed: 0,District,PostalCode
1,Stuttgart-Mitte_70174,70174
3,Stuttgart-Nord_70174,70174


In [29]:
new_row['District'] = "Stuttgart-Mitte-Nord_70174"
new_row['PostalCode'] = "70174"

df = df.append(new_row, ignore_index=True)
df.drop([1, 3], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70174']

Unnamed: 0,District,PostalCode
36,Stuttgart-Mitte-Nord_70174,70174


In [30]:
df[df['PostalCode'] == '70184']

Unnamed: 0,District,PostalCode
4,Stuttgart-Ost_70184,70184
9,Stuttgart-Süd_70184,70184


In [31]:
new_row['District'] = "Stuttgart-Ost-Süd_70184"
new_row['PostalCode'] = "70184"

df = df.append(new_row, ignore_index=True)
df.drop([4, 9], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70184']

Unnamed: 0,District,PostalCode
35,Stuttgart-Ost-Süd_70184,70184


In [32]:
df[df['PostalCode'] == '70499']

Unnamed: 0,District,PostalCode
18,Feuerbach_70499,70499
22,Weilimdorf,70499


In [33]:
new_row['District'] = "Stuttgart-Feuerbach-Weilimdorf_70499"
new_row['PostalCode'] = "70499"

df = df.append(new_row, ignore_index=True)
df.drop([18, 22], inplace=True, axis=0)
df.reset_index(inplace=True, drop=True)
df[df['PostalCode'] == '70499']

Unnamed: 0,District,PostalCode
34,Stuttgart-Feuerbach-Weilimdorf_70499,70499


In [34]:
df['PostalCode'].value_counts()

70174    1
70191    1
70186    1
70199    1
70332    1
70184    1
70327    1
70173    1
70372    1
70567    1
70190    1
70437    1
70599    1
70193    1
70499    1
70180    1
70469    1
70619    1
70376    1
70192    1
70563    1
70569    1
70378    1
70176    1
70439    1
70182    1
70435    1
70197    1
70374    1
70188    1
70597    1
70195    1
70565    1
70329    1
70178    1
Name: PostalCode, dtype: int64

In [35]:
df

Unnamed: 0,District,PostalCode
0,Stuttgart-Mitte_70173,70173
1,Stuttgart-Mitte_70182,70182
2,Stuttgart-Nord_70191,70191
3,Stuttgart-Nord_70192,70192
4,Stuttgart-Ost_70186,70186
5,Stuttgart-Ost_70188,70188
6,Stuttgart-Ost_70190,70190
7,Stuttgart-Süd_70180,70180
8,Stuttgart-Süd_70199,70199
9,Stuttgart-West_70176,70176


# Add coordinates (latitude, longitude) to the dataframe

Use the while loop described on coursera to make sure that we get the coordinates for all of the neighborhoods, since this package has a little issue regarding the coordinates:

In [36]:
coordinates_list = []

for idx, row in df.iterrows():
    
    coordinates_dict = {}
    # initialize your variables to None
    lat = None
    lon = None

    # loop until you get the coordinates
    while(lat is None and lon is None):
        nom = Nominatim(user_agent="stuttgart_agent")
        g = nom.geocode(f"{row['PostalCode']}, Stuttgart, Germany")
        lat = g.latitude
        lon = g.longitude

    coordinates_dict['PostalCode'] = row['PostalCode']
    coordinates_dict['Latitude'] = lat
    coordinates_dict['Longitude'] = lon
    coordinates_list.append(coordinates_dict)

    print(f"row {idx} completed")

df_coordinates = pd.DataFrame(coordinates_list)
print(df_coordinates.shape)
df_coordinates.head()

row 0 completed
row 1 completed
row 2 completed
row 3 completed
row 4 completed
row 5 completed
row 6 completed
row 7 completed
row 8 completed
row 9 completed
row 10 completed
row 11 completed
row 12 completed
row 13 completed
row 14 completed
row 15 completed
row 16 completed
row 17 completed
row 18 completed
row 19 completed
row 20 completed
row 21 completed
row 22 completed
row 23 completed
row 24 completed
row 25 completed
row 26 completed
row 27 completed
row 28 completed
row 29 completed
row 30 completed
row 31 completed
row 32 completed
row 33 completed
row 34 completed
(35, 3)


Unnamed: 0,PostalCode,Latitude,Longitude
0,70173,48.777808,9.17842
1,70182,48.774376,9.184793
2,70191,48.798102,9.182559
3,70192,48.795235,9.165006
4,70186,48.775952,9.207278


Use pandas with "inner join" on Postal Code to join both dataframes:

In [37]:
df = df.merge(df_coordinates, how='inner', left_on='PostalCode', right_on='PostalCode')
df

Unnamed: 0,District,PostalCode,Latitude,Longitude
0,Stuttgart-Mitte_70173,70173,48.777808,9.17842
1,Stuttgart-Mitte_70182,70182,48.774376,9.184793
2,Stuttgart-Nord_70191,70191,48.798102,9.182559
3,Stuttgart-Nord_70192,70192,48.795235,9.165006
4,Stuttgart-Ost_70186,70186,48.775952,9.207278
5,Stuttgart-Ost_70188,70188,48.78436,9.208555
6,Stuttgart-Ost_70190,70190,48.789475,9.202558
7,Stuttgart-Süd_70180,70180,48.763985,9.174787
8,Stuttgart-Süd_70199,70199,48.75968,9.153594
9,Stuttgart-West_70176,70176,48.77743,9.161035


In [38]:
print(df.shape)
df.isna().sum()

(35, 4)


District      0
PostalCode    0
Latitude      0
Longitude     0
dtype: int64

Now we have our dataframe :-)

# Explore and cluster districts

Use geopy library to get the latitude and longitude values of Stuttgart.

In [39]:
address = 'Stuttgart, Germany'

geolocator = Nominatim(user_agent="stuttgart_explorer")
location = geolocator.geocode(address)
latitude_stuttgart = location.latitude
longitude_stuttgart = location.longitude
print('The geographical coordinate of Stuttgart are {}, {}.'.format(latitude_stuttgart, longitude_stuttgart))

The geographical coordinate of Stuttgart are 48.7784485, 9.1800132.


In [40]:
map_stuttgart = folium.Map(location=[latitude_stuttgart, longitude_stuttgart], zoom_start=12)

# add markers to map
for lat, lng, district, postalcode in zip(df['Latitude'], df['Longitude'], df['District'], df['PostalCode']):
    label = '{}, {}'.format(district, postalcode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_stuttgart)  
    
map_stuttgart

Define Foursquare Credentials and Version:

In [41]:
CLIENT_ID = 'X0V0U4AT1X3V5YIZ4G0VTZHTCFLRV133WQQKKL20UBFS51GX' # your Foursquare ID
CLIENT_SECRET = 'TFXJYZ1ZZLYWT3IX4CIQN2T4NPBUGZFQAQ3OEK5BTTX5CU1G' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: X0V0U4AT1X3V5YIZ4G0VTZHTCFLRV133WQQKKL20UBFS51GX
CLIENT_SECRET:TFXJYZ1ZZLYWT3IX4CIQN2T4NPBUGZFQAQ3OEK5BTTX5CU1G


For testing purpose let's explore the first neighborhood in the stuttgart dataframe

Get the name, latitude and longitude of the first neighboorhood

In [42]:
district_latitude = df.loc[0, 'Latitude'] # neighborhood latitude value
district_longitude = df.loc[0, 'Longitude'] # neighborhood longitude value

district_name = df.loc[0, 'District'] # neighborhood name

print('Latitude and longitude values of {} are {}, {}.'.format(district_name, 
                                                               district_latitude, 
                                                               district_longitude))

Latitude and longitude values of Stuttgart-Mitte_70173 are 48.77780796305648, 9.178419500470174.


Let's get the top 100 venues for Stuttgart Mitte:

In [43]:
radius = 500 # define radius

url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    district_latitude, 
    district_longitude, 
    radius, 
    LIMIT)

url

'https://api.foursquare.com/v2/venues/explore?&client_id=X0V0U4AT1X3V5YIZ4G0VTZHTCFLRV133WQQKKL20UBFS51GX&client_secret=TFXJYZ1ZZLYWT3IX4CIQN2T4NPBUGZFQAQ3OEK5BTTX5CU1G&v=20180605&ll=48.77780796305648,9.178419500470174&radius=500&limit=100'

In [44]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '60e18ce14b32090048daf3a0'},
 'response': {'suggestedFilters': {'header': 'Tap to show:',
   'filters': [{'name': 'Open now', 'key': 'openNow'}]},
  'headerLocation': 'Rathaus',
  'headerFullLocation': 'Rathaus, Stuttgart',
  'headerLocationGranularity': 'neighborhood',
  'totalResults': 100,
  'suggestedBounds': {'ne': {'lat': 48.782307967556484,
    'lng': 9.18523548389474},
   'sw': {'lat': 48.77330795855648, 'lng': 9.171603517045607}},
  'groups': [{'type': 'Recommended Places',
    'name': 'recommended',
    'items': [{'reasons': {'count': 0,
       'items': [{'summary': 'This spot is popular',
         'type': 'general',
         'reasonName': 'globalInteractionReason'}]},
      'venue': {'id': '4b9d1894f964a5203a8f36e3',
       'name': 'Schlossplatz',
       'location': {'address': 'Schlossplatz',
        'lat': 48.77854905890561,
        'lng': 9.179854989051819,
        'labeledLatLngs': [{'label': 'display',
          'lat': 48.778549058905

Use the get_category_type function from the Foursquare lab:

In [45]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [46]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues = nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

  nearby_venues = json_normalize(venues) # flatten JSON


Unnamed: 0,name,categories,lat,lng
0,Schlossplatz,Plaza,48.778549,9.179855
1,Feinkost Böhm,Gourmet Shop,48.778077,9.176317
2,Kleiner Schlossplatz,Plaza,48.778394,9.17686
3,bungalow,Men's Store,48.776175,9.17797
4,Old Bridge,Ice Cream Shop,48.780007,9.177899


In [47]:
print('{} venues were returned by Foursquare.'.format(nearby_venues.shape[0]))

100 venues were returned by Foursquare.


### Explore districts in Stuttgart

We will use the function from the New York lab to repeat the same process to all the districts in Stuttgart:

In [48]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()['response']['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

Run the above function on each neighborhood and create a new dataframe:

In [49]:
stuttgart_venues = getNearbyVenues(
    names=df['District'],
    latitudes=df['Latitude'],
    longitudes=df['Longitude'])

Stuttgart-Mitte_70173
Stuttgart-Mitte_70182
Stuttgart-Nord_70191
Stuttgart-Nord_70192
Stuttgart-Ost_70186
Stuttgart-Ost_70188
Stuttgart-Ost_70190
Stuttgart-Süd_70180
Stuttgart-Süd_70199
Stuttgart-West_70176
Stuttgart-West_70193
Stuttgart-West_70197
Bad_Cannstatt_70332
Bad_Cannstatt_70372
Bad_Cannstatt_70374
Botnang
Degerloch
Feuerbach_70469
Möhringen_70567
Sillenbuch
Vaihingen_70563
Zuffenhausen_70435
Stuttgart-Mitte-Süd-West_70178
Stuttgart-Ost-Untertürkheim-Wangen_70327
Stuttgart-Hedelfingen-Obertürkheim_70329
Stuttgart-Bad_Cannstatt-Münster_70376
Stuttgart-Stammheim-Zuffenhausen_70439
Stuttgart-Mühlhausen-Zuffenhausen_70437
Stuttgart-Süd-Vaihingen_70569
Stuttgart-Birkach-Plieningen_70599
Stuttgart-Bad_Cannstatt-Mühlhausen_70378
Stuttgart-Möhringen-Vaihingen_70565
Stuttgart-Mitte-Nord_70174
Stuttgart-Ost-Süd_70184
Stuttgart-Feuerbach-Weilimdorf_70499


Let's check the size and head of the resulting dataframe:

In [50]:
print(stuttgart_venues.shape)
stuttgart_venues.head()

(719, 7)


Unnamed: 0,District,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Stuttgart-Mitte_70173,48.777808,9.17842,Schlossplatz,48.778549,9.179855,Plaza
1,Stuttgart-Mitte_70173,48.777808,9.17842,Feinkost Böhm,48.778077,9.176317,Gourmet Shop
2,Stuttgart-Mitte_70173,48.777808,9.17842,Kleiner Schlossplatz,48.778394,9.17686,Plaza
3,Stuttgart-Mitte_70173,48.777808,9.17842,bungalow,48.776175,9.17797,Men's Store
4,Stuttgart-Mitte_70173,48.777808,9.17842,Old Bridge,48.780007,9.177899,Ice Cream Shop


Let's check how many venues were returned for each district

In [51]:
stuttgart_venues.groupby('District').count()

Unnamed: 0_level_0,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bad_Cannstatt_70332,80,80,80,80,80,80
Bad_Cannstatt_70372,36,36,36,36,36,36
Bad_Cannstatt_70374,3,3,3,3,3,3
Botnang,4,4,4,4,4,4
Degerloch,14,14,14,14,14,14
Feuerbach_70469,16,16,16,16,16,16
Möhringen_70567,14,14,14,14,14,14
Sillenbuch,10,10,10,10,10,10
Stuttgart-Bad_Cannstatt-Mühlhausen_70378,2,2,2,2,2,2
Stuttgart-Bad_Cannstatt-Münster_70376,13,13,13,13,13,13


Let's find out how many unique categories can be curated from all the returned venues

In [52]:
print('There are {} uniques categories.'.format(len(stuttgart_venues['Venue Category'].unique())))

There are 148 uniques categories.


Create a new dataframe where the venue categories are seperated via one hot encoding:

In [53]:
# one hot encoding
stuttgart_onehot = pd.get_dummies(stuttgart_venues[['Venue Category']], prefix="", prefix_sep="")
stuttgart_onehot.head()

Unnamed: 0,African Restaurant,American Restaurant,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bank,Bar,Beach Bar,Beer Garden,Beer Store,Big Box Store,Bistro,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Station,Bus Stop,Business Service,Café,Camera Store,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Doner Restaurant,Donut Shop,Drugstore,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Food Court,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Garden,Gastropub,Gay Bar,General Entertainment,German Pop-Up Restaurant,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Insurance Office,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Kebab Restaurant,Korean Restaurant,Lounge,Market,Massage Studio,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Movie Theater,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Photography Studio,Pizza Place,Playground,Plaza,Pool,Pool Hall,Pub,Record Shop,Restaurant,Rock Club,Sandwich Place,Sauna / Steam Room,Scenic Lookout,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soup Place,South Indian Restaurant,Souvenir Shop,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Swabian Restaurant,Taverna,Tennis Stadium,Thai Restaurant,Theater,Train Station,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Winery,Yoga Studio
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Let's insert the district name from the stuttgart_venues dataframe and set it as our first column.

In [54]:
stuttgart_onehot.insert(0, 'District', stuttgart_venues['District'])
stuttgart_onehot.head()

Unnamed: 0,District,African Restaurant,American Restaurant,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bank,Bar,Beach Bar,Beer Garden,Beer Store,Big Box Store,Bistro,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Station,Bus Stop,Business Service,Café,Camera Store,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Doner Restaurant,Donut Shop,Drugstore,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Food Court,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Garden,Gastropub,Gay Bar,General Entertainment,German Pop-Up Restaurant,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Insurance Office,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Kebab Restaurant,Korean Restaurant,Lounge,Market,Massage Studio,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Movie Theater,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Photography Studio,Pizza Place,Playground,Plaza,Pool,Pool Hall,Pub,Record Shop,Restaurant,Rock Club,Sandwich Place,Sauna / Steam Room,Scenic Lookout,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soup Place,South Indian Restaurant,Souvenir Shop,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Swabian Restaurant,Taverna,Tennis Stadium,Thai Restaurant,Theater,Train Station,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Winery,Yoga Studio
0,Stuttgart-Mitte_70173,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Stuttgart-Mitte_70173,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Stuttgart-Mitte_70173,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Stuttgart-Mitte_70173,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Stuttgart-Mitte_70173,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Let's check the size of our new dataframe:

In [55]:
stuttgart_onehot.shape

(719, 149)

Group rows by district name and take the mean of the frequency of occurence of each category:

In [56]:
stuttgart_grouped = stuttgart_onehot.groupby('District').mean().reset_index()
stuttgart_grouped

Unnamed: 0,District,African Restaurant,American Restaurant,Art Museum,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bank,Bar,Beach Bar,Beer Garden,Beer Store,Big Box Store,Bistro,Bookstore,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Bus Station,Bus Stop,Business Service,Café,Camera Store,Chinese Restaurant,Clothing Store,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Construction & Landscaping,Cupcake Shop,Deli / Bodega,Department Store,Dessert Shop,Diner,Discount Store,Doner Restaurant,Donut Shop,Drugstore,Electronics Store,Event Space,Falafel Restaurant,Farm,Farmers Market,Fast Food Restaurant,Flower Shop,Food & Drink Shop,Food Court,Fountain,French Restaurant,Fried Chicken Joint,Fruit & Vegetable Store,Furniture / Home Store,Garden,Gastropub,Gay Bar,General Entertainment,German Pop-Up Restaurant,German Restaurant,Gift Shop,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Historic Site,History Museum,Hookah Bar,Hostel,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Insurance Office,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Kebab Restaurant,Korean Restaurant,Lounge,Market,Massage Studio,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Movie Theater,Multiplex,Museum,Music Venue,Neighborhood,Nightclub,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store,Park,Pedestrian Plaza,Performing Arts Venue,Persian Restaurant,Photography Studio,Pizza Place,Playground,Plaza,Pool,Pool Hall,Pub,Record Shop,Restaurant,Rock Club,Sandwich Place,Sauna / Steam Room,Scenic Lookout,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Smoke Shop,Snack Place,Soup Place,South Indian Restaurant,Souvenir Shop,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Swabian Restaurant,Taverna,Tennis Stadium,Thai Restaurant,Theater,Train Station,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Winery,Yoga Studio
0,Bad_Cannstatt_70332,0.0,0.0,0.0125,0.0125,0.0,0.0,0.0,0.0,0.0375,0.0125,0.0125,0.0125,0.0,0.0,0.0125,0.0,0.0,0.0,0.0125,0.0125,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0375,0.0125,0.025,0.0,0.0,0.0,0.0,0.0,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,0.0,0.0,0.0125,0.0,0.0,0.0,0.0,0.0,0.0125,0.0,0.0,0.0125,0.0,0.0,0.0125,0.0,0.0,0.075,0.0,0.025,0.0,0.0,0.0,0.0,0.0,0.0375,0.0,0.0,0.025,0.0,0.025,0.0,0.0,0.0,0.025,0.0125,0.0,0.0,0.0125,0.0125,0.0125,0.0,0.0125,0.0,0.0,0.0,0.025,0.0,0.0125,0.0125,0.0,0.0,0.0125,0.0,0.0125,0.0,0.025,0.0125,0.0125,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.025,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,0.0,0.0,0.0,0.025,0.0375,0.0,0.0125,0.0,0.025,0.025,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,0.0,0.0
1,Bad_Cannstatt_70372,0.0,0.0,0.0,0.027778,0.0,0.0,0.055556,0.0,0.055556,0.0,0.027778,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.027778,0.0,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.055556,0.0,0.0,0.027778,0.027778,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.027778,0.027778,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0
2,Bad_Cannstatt_70374,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Botnang,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Degerloch,0.0,0.071429,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Feuerbach_70469,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1875,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Möhringen_70567,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,0.0
7,Sillenbuch,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1
8,Stuttgart-Bad_Cannstatt-Mühlhausen_70378,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Stuttgart-Bad_Cannstatt-Münster_70376,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.153846,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [57]:
stuttgart_grouped.shape

(35, 149)

Print each neighborhood with the top 5 most common venues:

In [58]:
num_top_venues = 5

for district in stuttgart_grouped['District']:
    print("----"+district+"----")
    temp = stuttgart_grouped[stuttgart_grouped['District'] == district].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Bad_Cannstatt_70332----
               venue  freq
0  German Restaurant  0.08
1              Plaza  0.06
2               Café  0.05
3                Bar  0.04
4   Sushi Restaurant  0.04


----Bad_Cannstatt_70372----
              venue  freq
0         Drugstore  0.08
1              Café  0.08
2            Bakery  0.06
3               Bar  0.06
4  Greek Restaurant  0.06


----Bad_Cannstatt_70374----
                venue  freq
0         Pizza Place  0.33
1       Metro Station  0.33
2             Taverna  0.33
3  African Restaurant  0.00
4                Park  0.00


----Botnang----
                venue  freq
0         Supermarket  0.50
1              Bakery  0.25
2      Farmers Market  0.25
3  African Restaurant  0.00
4    Pedestrian Plaza  0.00


----Degerloch----
                 venue  freq
0          Supermarket  0.14
1               Bakery  0.14
2       Ice Cream Shop  0.07
3  American Restaurant  0.07
4        Grocery Store  0.07


----Feuerbach_70469----
                venu

Write the results into a pandas dataframe.

We will use the function of the New York lab to sort the venues in descending order:

In [59]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

Create the new dataframe with the top 10 venues for each district:

In [60]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
district_venues_sorted = pd.DataFrame(columns=columns)
district_venues_sorted['District'] = stuttgart_grouped['District']

for ind in np.arange(stuttgart_grouped.shape[0]):
    district_venues_sorted.iloc[ind, 1:] = return_most_common_venues(stuttgart_grouped.iloc[ind, :], num_top_venues)

district_venues_sorted.head()

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bad_Cannstatt_70332,German Restaurant,Plaza,Café,Bar,Sushi Restaurant,History Museum,Clothing Store,Restaurant,Supermarket,Coffee Shop
1,Bad_Cannstatt_70372,Drugstore,Café,Bakery,Bar,Greek Restaurant,Metro Station,Hotel,Taverna,German Restaurant,Cupcake Shop
2,Bad_Cannstatt_70374,Pizza Place,Metro Station,Taverna,African Restaurant,Park,Nightclub,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store
3,Botnang,Supermarket,Bakery,Farmers Market,African Restaurant,Pedestrian Plaza,Nightclub,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store
4,Degerloch,Supermarket,Bakery,Ice Cream Shop,American Restaurant,Grocery Store,German Restaurant,French Restaurant,Playground,Drugstore,Shop & Service


### Cluster Districts

We will use k-means cluster to cluster the districts into 3 clusters:

In [67]:
# set number of clusters
kclusters = 3

stuttgart_grouped_clustering = stuttgart_grouped.drop('District', axis=1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0)
kmeans.fit(stuttgart_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([1, 1, 2, 1, 1, 1, 1, 1, 0, 1])

Create a new dataframe that includes the cluster and the top 10 venues for each neighborhood:

In [68]:
# add clustering labels
district_venues_sorted.insert(0, 'Cluster Labels' kmeans.labels_)

stuttgart_merged = df

# merge stuttgart_grouped with df to add latitude/longitude for each neighborhood
stuttgart_merged = stuttgart_merged.merge(district_venues_sorted.set_index('District'), how='inner', left_on='District', right_on='District')

stuttgart_merged.head() # check the last columns!

Unnamed: 0,District,PostalCode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Stuttgart-Mitte_70173,70173,48.777808,9.17842,1,German Restaurant,Café,Bar,Plaza,Coffee Shop,Sushi Restaurant,Clothing Store,Italian Restaurant,Ice Cream Shop,Boutique
1,Stuttgart-Mitte_70182,70182,48.774376,9.184793,1,Café,German Restaurant,Hotel,Plaza,African Restaurant,Restaurant,History Museum,Italian Restaurant,Japanese Restaurant,Jazz Club
2,Stuttgart-Nord_70191,70191,48.798102,9.182559,1,Museum,German Restaurant,Beer Garden,Neighborhood,Park,Nightclub,Opera House,Organic Grocery,Pool Hall,Palace
3,Stuttgart-Nord_70192,70192,48.795235,9.165006,1,Stadium,Thai Restaurant,Athletics & Sports,French Restaurant,Tennis Stadium,Museum,Neighborhood,Nightclub,Opera House,Organic Grocery
4,Stuttgart-Ost_70186,70186,48.775952,9.207278,1,Bakery,Bus Stop,Grocery Store,Italian Restaurant,Bar,Trattoria/Osteria,Nightclub,Opera House,Organic Grocery,Palace


Visualize the resulting clusters:

In [69]:
# create map
map_clusters = folium.Map(location=[latitude_stuttgart, longitude_stuttgart], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, dist, cluster in zip(stuttgart_merged['Latitude'], stuttgart_merged['Longitude'], stuttgart_merged['District'], stuttgart_merged['Cluster Labels']):
    label = folium.Popup(str(dist) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

Since the result with 3 clusters is not really satifying, we will use 6 clusters and see if we get a better result

In [70]:
# set number of clusters
kclusters = 6

stuttgart_grouped_clustering = stuttgart_grouped.drop('District', axis=1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0)
kmeans.fit(stuttgart_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 3, 5, 4, 3, 3, 3, 3, 1, 3])

Create a new dataframe that includes the cluster and the top 10 venues for each neighborhood:

In [71]:
# add clustering labels
district_venues_sorted['Cluster Labels'] = kmeans.labels_

stuttgart_merged = df

# merge stuttgart_grouped with df to add latitude/longitude for each neighborhood
stuttgart_merged = stuttgart_merged.merge(district_venues_sorted.set_index('District'), how='inner', left_on='District', right_on='District')

stuttgart_merged.head() # check the last columns!

Unnamed: 0,District,PostalCode,Latitude,Longitude,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Stuttgart-Mitte_70173,70173,48.777808,9.17842,0,German Restaurant,Café,Bar,Plaza,Coffee Shop,Sushi Restaurant,Clothing Store,Italian Restaurant,Ice Cream Shop,Boutique
1,Stuttgart-Mitte_70182,70182,48.774376,9.184793,0,Café,German Restaurant,Hotel,Plaza,African Restaurant,Restaurant,History Museum,Italian Restaurant,Japanese Restaurant,Jazz Club
2,Stuttgart-Nord_70191,70191,48.798102,9.182559,0,Museum,German Restaurant,Beer Garden,Neighborhood,Park,Nightclub,Opera House,Organic Grocery,Pool Hall,Palace
3,Stuttgart-Nord_70192,70192,48.795235,9.165006,0,Stadium,Thai Restaurant,Athletics & Sports,French Restaurant,Tennis Stadium,Museum,Neighborhood,Nightclub,Opera House,Organic Grocery
4,Stuttgart-Ost_70186,70186,48.775952,9.207278,3,Bakery,Bus Stop,Grocery Store,Italian Restaurant,Bar,Trattoria/Osteria,Nightclub,Opera House,Organic Grocery,Palace


Visualize the resulting clusters:

In [72]:
# create map
map_clusters = folium.Map(location=[latitude_stuttgart, longitude_stuttgart], zoom_start=12)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, dist, cluster in zip(stuttgart_merged['Latitude'], stuttgart_merged['Longitude'], stuttgart_merged['District'], stuttgart_merged['Cluster Labels']):
    label = folium.Popup(str(dist) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

The result with 6 clusters looks a lot better. We will go ahead and examine the clusters.

### Examine clusters

Cluster 1:

It looks like there are a lot of venues here belong to the categories coffee shop, café and some restaurants. We will call this cluster "Café/Restaurant".

In [73]:
stuttgart_merged.loc[stuttgart_merged['Cluster Labels'] == 0, stuttgart_merged.columns[[0] + list(range(5, stuttgart_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Stuttgart-Mitte_70173,German Restaurant,Café,Bar,Plaza,Coffee Shop,Sushi Restaurant,Clothing Store,Italian Restaurant,Ice Cream Shop,Boutique
1,Stuttgart-Mitte_70182,Café,German Restaurant,Hotel,Plaza,African Restaurant,Restaurant,History Museum,Italian Restaurant,Japanese Restaurant,Jazz Club
2,Stuttgart-Nord_70191,Museum,German Restaurant,Beer Garden,Neighborhood,Park,Nightclub,Opera House,Organic Grocery,Pool Hall,Palace
3,Stuttgart-Nord_70192,Stadium,Thai Restaurant,Athletics & Sports,French Restaurant,Tennis Stadium,Museum,Neighborhood,Nightclub,Opera House,Organic Grocery
6,Stuttgart-Ost_70190,Hotel,Café,Italian Restaurant,Metro Station,Fast Food Restaurant,Comfort Food Restaurant,Smoke Shop,Park,Pub,Burger Joint
7,Stuttgart-Süd_70180,Café,Bar,Vietnamese Restaurant,Supermarket,Restaurant,Italian Restaurant,African Restaurant,Organic Grocery,Drugstore,French Restaurant
8,Stuttgart-Süd_70199,Bakery,German Restaurant,Business Service,French Restaurant,Bus Stop,Indian Restaurant,General Entertainment,Plaza,Market,Brewery
9,Stuttgart-West_70176,Café,Italian Restaurant,Supermarket,Playground,Bakery,Bar,Pub,Drugstore,Coffee Shop,South Indian Restaurant
10,Stuttgart-West_70193,Playground,South Indian Restaurant,Scenic Lookout,German Restaurant,African Restaurant,Park,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store
11,Stuttgart-West_70197,Supermarket,Dessert Shop,Spanish Restaurant,German Restaurant,Gym / Fitness Center,Japanese Restaurant,Grocery Store,Playground,Italian Restaurant,Food & Drink Shop


Cluster 2:

In this cluster there is only one district, we will call this cluster by the district name "Bad Cannstatt".

In [75]:
stuttgart_merged.loc[stuttgart_merged['Cluster Labels'] == 1, stuttgart_merged.columns[[0] + list(range(5, stuttgart_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
30,Stuttgart-Bad_Cannstatt-Mühlhausen_70378,Historic Site,Photography Studio,African Restaurant,Pedestrian Plaza,Nightclub,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store,Park


Cluster 3:

In this cluster there is only one district, we will call this cluster by the district name "Stuttgart Ost".

In [76]:
stuttgart_merged.loc[stuttgart_merged['Cluster Labels'] == 2, stuttgart_merged.columns[[0] + list(range(5, stuttgart_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
33,Stuttgart-Ost-Süd_70184,Metro Station,Flower Shop,Greek Restaurant,African Restaurant,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store,Park,Pedestrian Plaza


Cluster 4:

In this cluster there are a lot of different venues, we will call it "Mixed".

In [78]:
stuttgart_merged.loc[stuttgart_merged['Cluster Labels'] == 3, stuttgart_merged.columns[[0] + list(range(5, stuttgart_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
4,Stuttgart-Ost_70186,Bakery,Bus Stop,Grocery Store,Italian Restaurant,Bar,Trattoria/Osteria,Nightclub,Opera House,Organic Grocery,Palace
5,Stuttgart-Ost_70188,Supermarket,Ice Cream Shop,Bakery,Park,Metro Station,Gym,Drugstore,Pool,Diner,Food & Drink Shop
13,Bad_Cannstatt_70372,Drugstore,Café,Bakery,Bar,Greek Restaurant,Metro Station,Hotel,Taverna,German Restaurant,Cupcake Shop
16,Degerloch,Supermarket,Bakery,Ice Cream Shop,American Restaurant,Grocery Store,German Restaurant,French Restaurant,Playground,Drugstore,Shop & Service
17,Feuerbach_70469,Hotel,Restaurant,Italian Restaurant,Metro Station,Dessert Shop,Drugstore,Supermarket,Middle Eastern Restaurant,Greek Restaurant,Pub
18,Möhringen_70567,Hotel,Bakery,Middle Eastern Restaurant,Restaurant,Farm,Trattoria/Osteria,Supermarket,Café,Metro Station,Greek Restaurant
19,Sillenbuch,Yoga Studio,Gym,Farmers Market,Shopping Mall,Chinese Restaurant,Supermarket,Middle Eastern Restaurant,Metro Station,Drugstore,Bank
21,Zuffenhausen_70435,Burger Joint,Restaurant,Drugstore,Ice Cream Shop,Japanese Restaurant,Steakhouse,Breakfast Spot,Supermarket,German Pop-Up Restaurant,Shipping Store
23,Stuttgart-Ost-Untertürkheim-Wangen_70327,Bakery,Doner Restaurant,Grocery Store,Gastropub,Wine Shop,Pool,German Pop-Up Restaurant,German Restaurant,Metro Station,Greek Restaurant
25,Stuttgart-Bad_Cannstatt-Münster_70376,Bakery,Supermarket,Metro Station,Discount Store,Italian Restaurant,Hotel,Restaurant,German Restaurant,Theater,Middle Eastern Restaurant


Cluster 5:

In this cluster there are also a lot of different venues, we will call it "Mixed", too.

In [79]:
stuttgart_merged.loc[stuttgart_merged['Cluster Labels'] == 4, stuttgart_merged.columns[[0] + list(range(5, stuttgart_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
15,Botnang,Supermarket,Bakery,Farmers Market,African Restaurant,Pedestrian Plaza,Nightclub,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store
24,Stuttgart-Hedelfingen-Obertürkheim_70329,Supermarket,Fast Food Restaurant,Lounge,African Restaurant,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store,Park,Pedestrian Plaza


Cluster 6:

In this cluster there are some parks, an opera house and a palace, we will call it "Culture".

In [80]:
stuttgart_merged.loc[stuttgart_merged['Cluster Labels'] == 5, stuttgart_merged.columns[[0] + list(range(5, stuttgart_merged.shape[1]))]]

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
14,Bad_Cannstatt_70374,Pizza Place,Metro Station,Taverna,African Restaurant,Park,Nightclub,Opera House,Organic Grocery,Palace,Paper / Office Supplies Store
26,Stuttgart-Stammheim-Zuffenhausen_70439,Metro Station,Hotel,Supermarket,Pizza Place,African Restaurant,Park,Nightclub,Opera House,Organic Grocery,Palace
27,Stuttgart-Mühlhausen-Zuffenhausen_70437,Metro Station,Train Station,Hotel,Bakery,Park,Pedestrian Plaza,Nightclub,Opera House,Organic Grocery,Palace
