# Import Libraries

In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

import json # library to handle JSON files

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
!pip install geocoder # to get coordinates
import geocoder

import requests # library to handle requests
from urllib.request import urlopen
from bs4 import BeautifulSoup # library to parse HTML and XML documents

!conda install -c conda-forge folium=0.5.0 --yes
import folium # map rendering library

from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans



print("Libraries imported.")

Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |████████████████████████████████| 102kB 15.1MB/s ta 0:00:01
Collecting ratelim (from geocoder)
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592cad49c/ratelim-0.1.6-py2.py3-none-any.whl
Installing collected packages: ratelim, geocoder
Successfully installed geocoder-1.38.1 ratelim-0.1.6
Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
   

# 2. Scrap data from Wikipedia page into a DataFrame¶


In [2]:
url = "https://de.wikipedia.org/wiki/Liste_der_Stadtteile_M%C3%BCnchens"
page = urlopen(url).read().decode("utf-8")
soup = BeautifulSoup(page, "html.parser")

wiki_table = soup.find("table", {"class":"wikitable sortable zebra"}).tbody



In [3]:
##Extracting data from the table to use the data frame

def get_cell(elements):
    cells = elements.find_all("td")
    row = []
    for cell in cells:
        if cell.a:
            if (cell.a.text):
                row.append(cell.a.text)
                continue
        row.append(cells)
    return row

def get_row():
    data = []
    for tr in wiki_table.find_all("tr"):
        row = get_cell(tr)
        if len(row) !=4:
            continue
        data.append(row)
    return data

In [4]:
# Building table
data = get_row()
columns = ["Borough", "Stadtbezirk", "Nr", "Quartiere"]
df = pd.DataFrame(data, columns = columns)


In [5]:
muc_df = df.drop(["Stadtbezirk","Nr", "Quartiere"], axis = 1 )

In [6]:
muc_df.to_csv("muc_df - Borough")

# 3. Get the geographical coordinates¶


In [7]:
# define a function to get coordinates
def get_latlng(Borough):
    # initialize youre variable to None
    lat_lng_coords = None
    # loop until you get the coordinates
    while (lat_lng_coords is None):
        g = geocoder.arcgis("{}, Munich, Germany".format(Borough))
        lat_lng_coords = g.latlng
    return lat_lng_coords

In [8]:
coords = [get_latlng(Borough) for Borough in muc_df["Borough"].tolist()]

In [9]:
coords

[[48.189940000000036, 11.460290000000043],
 [48.13641000000007, 11.577540000000056],
 [48.196510000000046, 11.57200000000006],
 [48.13244000000003, 11.659800000000075],
 [48.18309151839817, 11.563077891046479],
 [48.126370000000065, 11.58331000000004],
 [48.157260000000065, 11.41578000000004],
 [48.13132000000007, 11.632090000000062],
 [48.13999000000007, 11.60361000000006],
 [48.14300000000003, 11.661270000000059],
 [48.149320000000046, 11.640510000000063],
 [48.155980000000056, 11.641680000000065],
 [48.09098000000006, 11.610580000000027],
 [48.21461000000005, 11.53490000000005],
 [48.08328000000006, 11.484480000000076],
 [48.13467000000003, 11.410200000000032],
 [48.19002000000006, 11.615420000000029],
 [48.094970000000046, 11.49866000000003],
 [48.109620000000064, 11.57534000000004],
 [48.109620000000064, 11.57534000000004],
 [48.115350000000035, 11.479380000000049],
 [48.11529812867541, 11.493999199097289],
 [48.11529812867541, 11.493999199097289],
 [48.12930000000006, 11.59921000

In [10]:
# create temporary dataframe to populate the coordinates into Latitude and Longitude
df_coords = pd.DataFrame(coords, columns=['Latitude', 'Longitude'])

In [11]:
# merge the coordinates into the original dataframe
muc_df['Latitude'] = df_coords['Latitude']
muc_df['Longitude'] = df_coords['Longitude']

In [12]:
# check the neighborhoods and the coordinates
print(muc_df.shape)
muc_df

(56, 3)


Unnamed: 0,Borough,Latitude,Longitude
0,Allach,48.18994,11.46029
1,Altstadt,48.13641,11.57754
2,Am Hart,48.19651,11.572
3,Am Moosfeld,48.13244,11.6598
4,Am Riesenfeld,48.183092,11.563078
5,Au,48.12637,11.58331
6,Aubing,48.15726,11.41578
7,Berg am Laim,48.13132,11.63209
8,Bogenhausen,48.13999,11.60361
9,Daglfing,48.143,11.66127


### Create a map of Munich with Boroughs superimposed on top¶


In [13]:
#get the coordinates of Munich
address = 'Munich, Germany'

geolocator = Nominatim(user_agent="foursquare_agent")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Munich, Germany {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Munich, Germany 48.1371079, 11.5753822.


In [14]:
# create map of Munich using latitude and longitude values
map_muc = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, Borough in zip(muc_df['Latitude'], muc_df['Longitude'], muc_df['Borough']):
    label = '{}'.format(Borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_muc)  
    
map_muc

### Use the Foursquare API to explore the Boroughs


In [15]:
# define Foursquare Credentials and Version
CLIENT_ID = 'M3ZVQGYUV3LWQXFFVAP2X2M1KC2AQZKKJGO30EKYS53NDMZT' # your Foursquare ID
CLIENT_SECRET = 'XS12M0XNGENAG4YUYQ1H3JR2OEA0J3TBQWFGXTS1IFM0S5RD' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: M3ZVQGYUV3LWQXFFVAP2X2M1KC2AQZKKJGO30EKYS53NDMZT
CLIENT_SECRET:XS12M0XNGENAG4YUYQ1H3JR2OEA0J3TBQWFGXTS1IFM0S5RD


### Now, get the 100 Top venues that are within the radius of 2000 meters


In [16]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, Borough in zip(muc_df['Latitude'], muc_df['Longitude'], muc_df['Borough']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            Borough,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [17]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Borough', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
gym_name = venues_df[venues_df.VenueCategory.isin(["Gym", "Gym / Fitness Center"])]

gym_name = gym_name.drop(["Latitude", "Longitude"], axis = 1)

gym_name

(4395, 7)


Unnamed: 0,Borough,VenueName,VenueLatitude,VenueLongitude,VenueCategory
127,Am Hart,FitX,48.204959,11.587364,Gym / Fitness Center
130,Am Hart,McFIT,48.193098,11.592544,Gym / Fitness Center
200,Am Moosfeld,body + soul Center Trudering,48.122437,11.662373,Gym / Fitness Center
263,Am Moosfeld,Bodystreet,48.126418,11.641486,Gym / Fitness Center
319,Am Riesenfeld,body + soul Center München Nord,48.17837,11.537157,Gym / Fitness Center
497,Aubing,clever fit München Aubing,48.155043,11.41451,Gym / Fitness Center
519,Berg am Laim,CrossFit eo,48.140814,11.627844,Gym
525,Berg am Laim,Fitness First Platinum Swim Club,48.136407,11.614137,Gym / Fitness Center
553,Berg am Laim,Kieser Training Haidhausen,48.135782,11.60844,Gym
644,Bogenhausen,Fitness First Platinum Swim Club,48.136407,11.614137,Gym / Fitness Center


In [18]:
gym_name.shape

(99, 5)

In [19]:
# create map of Munich using latitude and longitude values
map_gym = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, VenueName in zip(gym_name['VenueLatitude'], gym_name['VenueLongitude'], gym_name['VenueName']):
    label = '{}'.format(VenueName)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_gym)  
    
map_gym


### let's check how many venues were returned for each borough


In [20]:
venues_df.groupby(["Borough"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Borough,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Allach,21,21,21,21,21,21
Altstadt,100,100,100,100,100,100
Am Hart,77,77,77,77,77,77
Am Moosfeld,85,85,85,85,85,85
Am Riesenfeld,100,100,100,100,100,100
Au,100,100,100,100,100,100
Aubing,29,29,29,29,29,29
Berg am Laim,96,96,96,96,96,96
Bogenhausen,100,100,100,100,100,100
Daglfing,73,73,73,73,73,73


### let's find out how many unique categories can be curated from all the returned venues


In [21]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))



There are 264 uniques categories.


In [22]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:100]

array(['German Restaurant', 'Trattoria/Osteria', 'Bakery',
       'Sporting Goods Shop', 'Beer Garden', 'Drugstore', 'Garden Center',
       'Bavarian Restaurant', 'Italian Restaurant', 'Supermarket',
       'Hotel', 'Food & Drink Shop', 'Furniture / Home Store',
       'Breakfast Spot', 'Tunnel', 'Plaza', 'Farmers Market',
       'Falafel Restaurant', 'Fountain', 'Café', 'Church', 'Gourmet Shop',
       'Coffee Shop', 'Fish Market', 'Organic Grocery', 'Steakhouse',
       'Cocktail Bar', 'Department Store', 'Opera House', 'Bookstore',
       'Wine Bar', 'Boutique', 'Vegetarian / Vegan Restaurant',
       'Jazz Club', 'English Restaurant', 'Cupcake Shop', 'Irish Pub',
       'Ice Cream Shop', 'Candy Store', 'Art Museum', 'Manti Place',
       "Men's Store", 'Yoga Studio', 'Historic Site', 'Theater',
       'Board Shop', 'Clothing Store', 'Restaurant', 'Shopping Mall',
       'Bar', 'Palace', 'Garden', 'Sandwich Place', 'Pastry Shop',
       'Bistro', 'Tea Room', 'Seafood Restaurant', '

In [23]:
# check if the results contain "Fitness"
"Gym / Fitness Center" in venues_df['VenueCategory'].unique()

True

# 6. Analyze each Borough


In [24]:
# one hot encoding
muc_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add Borough column back to dataframe
muc_onehot['Borough'] = venues_df['Borough'] 

# move Borough column to the first column
fixed_columns = [muc_onehot.columns[-1]] + list(muc_onehot.columns[:-1])
muc_onehot = muc_onehot[fixed_columns]

print(muc_onehot.shape)
muc_onehot.head()

(4395, 265)


Unnamed: 0,Borough,ATM,Accessories Store,Afghan Restaurant,American Restaurant,Aquarium,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Austrian Restaurant,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Basketball Stadium,Bavarian Restaurant,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bistro,Board Shop,Boarding House,Boat Rental,Bookstore,Bosnian Restaurant,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Line,Bus Stop,Butcher,Cafeteria,Café,Campground,Canal,Candy Store,Castle,Caucasian Restaurant,Chinese Restaurant,Church,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Cafeteria,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Creperie,Cretan Restaurant,Cultural Center,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,English Restaurant,Ethiopian Restaurant,Event Space,Fair,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Flower Shop,Food,Food & Drink Shop,Food Court,Forest,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,General Entertainment,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grilled Meat Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Hardware Store,Health & Beauty Service,Heliport,Hill,Historic Site,History Museum,Hockey Arena,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indoor Play Area,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewish Restaurant,Juice Bar,Kebab Restaurant,Kitchen Supply Store,Korean Restaurant,Lake,Laundromat,Lebanese Restaurant,Light Rail Station,Liquor Store,Lottery Retailer,Lounge,Malay Restaurant,Manti Place,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Modern Greek Restaurant,Monument / Landmark,Motel,Motorcycle Shop,Movie Theater,Museum,Music Store,Music Venue,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Outdoor Sculpture,Outlet Store,Palace,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Pet Store,Pharmacy,Pide Place,Pie Shop,Pizza Place,Planetarium,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Post Office,Pub,Public Art,Racetrack,Ramen Restaurant,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,River,Rock Climbing Spot,Rock Club,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Schnitzel Restaurant,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Storage Facility,Supermarket,Surf Spot,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Taverna,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Tram Station,Trattoria/Osteria,Tunnel,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Water Park,Waterfall,Wine Bar,Wine Shop,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Allach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Allach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
2,Allach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Allach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Allach,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Next, let's group rows by borough and by taking the mean of the frequency of occurrence of each category

In [25]:
muc_grouped = muc_onehot.groupby(["Borough"]).mean().reset_index()

print(muc_grouped.shape)
muc_grouped



(51, 265)


Unnamed: 0,Borough,ATM,Accessories Store,Afghan Restaurant,American Restaurant,Aquarium,Arcade,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,Austrian Restaurant,Auto Dealership,Auto Garage,Automotive Shop,BBQ Joint,Baby Store,Bagel Shop,Bakery,Bank,Bar,Basketball Stadium,Bavarian Restaurant,Beach,Beach Bar,Bed & Breakfast,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bistro,Board Shop,Boarding House,Boat Rental,Bookstore,Bosnian Restaurant,Botanical Garden,Boutique,Bowling Alley,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Line,Bus Stop,Butcher,Cafeteria,Café,Campground,Canal,Candy Store,Castle,Caucasian Restaurant,Chinese Restaurant,Church,Circus,Climbing Gym,Clothing Store,Club House,Cocktail Bar,Coffee Shop,College Cafeteria,Comedy Club,Comfort Food Restaurant,Comic Shop,Concert Hall,Convenience Store,Creperie,Cretan Restaurant,Cultural Center,Cupcake Shop,Dance Studio,Deli / Bodega,Department Store,Design Studio,Dessert Shop,Diner,Dog Run,Doner Restaurant,Donut Shop,Drugstore,Eastern European Restaurant,Electronics Store,English Restaurant,Ethiopian Restaurant,Event Space,Fair,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Flower Shop,Food,Food & Drink Shop,Food Court,Forest,Fountain,French Restaurant,Fried Chicken Joint,Frozen Yogurt Shop,Fruit & Vegetable Store,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,General Entertainment,German Restaurant,Gift Shop,Golf Course,Gourmet Shop,Greek Restaurant,Grilled Meat Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Hardware Store,Health & Beauty Service,Heliport,Hill,Historic Site,History Museum,Hockey Arena,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Indoor Play Area,Intersection,Irish Pub,Italian Restaurant,Japanese Restaurant,Jazz Club,Jewish Restaurant,Juice Bar,Kebab Restaurant,Kitchen Supply Store,Korean Restaurant,Lake,Laundromat,Lebanese Restaurant,Light Rail Station,Liquor Store,Lottery Retailer,Lounge,Malay Restaurant,Manti Place,Market,Martial Arts Dojo,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Miscellaneous Shop,Mobile Phone Shop,Modern European Restaurant,Modern Greek Restaurant,Monument / Landmark,Motel,Motorcycle Shop,Movie Theater,Museum,Music Store,Music Venue,Nightclub,Noodle House,Office,Opera House,Organic Grocery,Outdoor Sculpture,Outlet Store,Palace,Paper / Office Supplies Store,Park,Pastry Shop,Performing Arts Venue,Pet Store,Pharmacy,Pide Place,Pie Shop,Pizza Place,Planetarium,Playground,Plaza,Poke Place,Pool,Portuguese Restaurant,Post Office,Pub,Public Art,Racetrack,Ramen Restaurant,Rental Car Location,Residential Building (Apartment / Condo),Restaurant,River,Rock Climbing Spot,Rock Club,Roof Deck,Salad Place,Salon / Barbershop,Sandwich Place,Scenic Lookout,Schnitzel Restaurant,Science Museum,Seafood Restaurant,Shipping Store,Shoe Store,Shop & Service,Shopping Mall,Skate Park,Skating Rink,Smoke Shop,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Spa,Spanish Restaurant,Sporting Goods Shop,Sports Bar,Sports Club,Stadium,Steakhouse,Storage Facility,Supermarket,Surf Spot,Sushi Restaurant,Szechuan Restaurant,Taco Place,Tapas Restaurant,Taverna,Tea Room,Tennis Court,Thai Restaurant,Theater,Theme Restaurant,Tibetan Restaurant,Toy / Game Store,Track,Trail,Train Station,Tram Station,Trattoria/Osteria,Tunnel,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Water Park,Waterfall,Wine Bar,Wine Shop,Xinjiang Restaurant,Yoga Studio,Zoo,Zoo Exhibit
0,Allach,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.047619,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.095238,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.047619,0.047619,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Altstadt,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.11,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.07,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0
2,Am Hart,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051948,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.012987,0.012987,0.0,0.0,0.038961,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038961,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.077922,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.025974,0.0,0.0,0.025974,0.0,0.0,0.012987,0.012987,0.0,0.025974,0.0,0.0,0.0,0.064935,0.0,0.0,0.0,0.025974,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051948,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.025974,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.012987,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.0,0.012987,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.090909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025974,0.0,0.0,0.0,0.012987,0.0,0.0,0.0,0.0,0.012987,0.0,0.012987,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Am Moosfeld,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.070588,0.011765,0.0,0.0,0.023529,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.094118,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.023529,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.023529,0.0,0.023529,0.0,0.023529,0.0,0.0,0.0,0.023529,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.129412,0.0,0.011765,0.011765,0.0,0.0,0.0,0.0,0.0,0.035294,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.011765,0.011765,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.023529,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.117647,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.011765,0.0,0.047059,0.0,0.0,0.0,0.0,0.011765,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Am Riesenfeld,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.03,0.01,0.02,0.02,0.0,0.0,0.06,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.06,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.06,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Au,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.01,0.02,0.0,0.0,0.03,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.13,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.04,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.04,0.02,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0
6,Aubing,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.172414,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.068966,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.034483,0.0,0.034483,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.137931,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Berg am Laim,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.020833,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.052083,0.010417,0.0,0.0,0.020833,0.0,0.010417,0.0,0.0,0.010417,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.020833,0.0,0.0,0.020833,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.010417,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03125,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0625,0.0,0.0,0.010417,0.020833,0.0,0.010417,0.020833,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.145833,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.083333,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.010417,0.0,0.010417,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.104167,0.0,0.0,0.0,0.0,0.0,0.010417,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020833,0.010417,0.010417,0.0,0.0,0.010417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Bogenhausen,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.02,0.0,0.01,0.02,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.03,0.03,0.0,0.0,0.0,0.0,0.0,0.11,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0
9,Daglfing,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027397,0.0,0.0,0.0,0.013699,0.0,0.0,0.013699,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09589,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.027397,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.013699,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027397,0.0,0.027397,0.0,0.027397,0.0,0.0,0.0,0.013699,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.191781,0.013699,0.013699,0.041096,0.0,0.0,0.0,0.0,0.0,0.041096,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027397,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.013699,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027397,0.0,0.0,0.0,0.027397,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.068493,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.013699,0.0,0.0,0.0,0.0


In [26]:
len(muc_grouped[muc_grouped["Gym" and "Gym / Fitness Center"] > 0])



34

### create new DataFrame for Gym / Fitness Center data only

In [27]:
muc_gym = muc_grouped[["Borough", "Gym / Fitness Center" , "Gym"]]

In [28]:
muc_gym.head()

Unnamed: 0,Borough,Gym / Fitness Center,Gym
0,Allach,0.0,0.0
1,Altstadt,0.0,0.0
2,Am Hart,0.025974,0.0
3,Am Moosfeld,0.023529,0.0
4,Am Riesenfeld,0.01,0.0


# 7. Cluster Boroughs

### Run k-means to cluster the boroughs in Munich into 3 clusters

In [29]:
# Merge the categories "Gym" and "Fitness Center"
muc_gym["Fitness"]=muc_gym["Gym / Fitness Center"]+muc_gym["Gym"]
muc_gym

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


Unnamed: 0,Borough,Gym / Fitness Center,Gym,Fitness
0,Allach,0.0,0.0,0.0
1,Altstadt,0.0,0.0,0.0
2,Am Hart,0.025974,0.0,0.025974
3,Am Moosfeld,0.023529,0.0,0.023529
4,Am Riesenfeld,0.01,0.0,0.01
5,Au,0.0,0.0,0.0
6,Aubing,0.034483,0.0,0.034483
7,Berg am Laim,0.010417,0.020833,0.03125
8,Bogenhausen,0.01,0.0,0.01
9,Daglfing,0.013699,0.0,0.013699


In [30]:
# set number of clusters
kclusters = 3

muc_clustering = muc_gym.drop(["Borough"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(muc_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 0, 1, 1, 0, 0, 1, 1, 0, 0], dtype=int32)

In [31]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
muc_merged = muc_gym.copy()

# add clustering labels
muc_merged["Cluster Labels"] = kmeans.labels_

In [32]:
muc_merged.rename(columns={"Borough": "Borough"}, inplace=True)
muc_merged = muc_merged.drop(["Gym / Fitness Center", "Gym"], axis =1)

muc_merged.head()

Unnamed: 0,Borough,Fitness,Cluster Labels
0,Allach,0.0,0
1,Altstadt,0.0,0
2,Am Hart,0.025974,1
3,Am Moosfeld,0.023529,1
4,Am Riesenfeld,0.01,0


In [33]:
# merge muc_grouped with muc_data to add latitude/longitude for each borough
muc_merged = muc_merged.join(muc_df.set_index("Borough"), on="Borough")

print(muc_merged.shape)
muc_merged.head() # check the last columns!

(56, 5)


Unnamed: 0,Borough,Fitness,Cluster Labels,Latitude,Longitude
0,Allach,0.0,0,48.18994,11.46029
1,Altstadt,0.0,0,48.13641,11.57754
2,Am Hart,0.025974,1,48.19651,11.572
3,Am Moosfeld,0.023529,1,48.13244,11.6598
4,Am Riesenfeld,0.01,0,48.183092,11.563078


In [34]:
# sort the results by Cluster Labels
print(muc_merged.shape)
muc_merged.sort_values(["Cluster Labels"], inplace=True)
muc_merged

(56, 5)


Unnamed: 0,Borough,Fitness,Cluster Labels,Latitude,Longitude
0,Allach,0.0,0,48.18994,11.46029
19,Hadern,0.0,0,48.11535,11.47938
20,Haidhausen,0.02,0,48.1293,11.59921
21,Harlaching,0.011765,0,48.09576,11.56292
22,Hasenbergl,0.0,0,48.2132,11.55689
41,Riem,0.0,0,48.14058,11.68107
24,Isarvorstadt,0.0,0,48.13258,11.57615
35,Nymphenburg,0.01,0,48.15877,11.50885
49,Untermenzing,0.016129,0,48.18025,11.48354
28,Lehel,0.0,0,48.13775,11.58711


### Finally, let's visualize the resulting clusters

In [35]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(muc_merged['Latitude'], muc_merged['Longitude'], muc_merged['Borough'], muc_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [36]:
# save the map as HTML file
map_clusters.save('map_clusters.html')

# 8. Examine Clusters

### Cluster 0

In [56]:
cluster_0 = muc_merged.loc[muc_merged['Cluster Labels'] == 0]
cluster_0.sort_values(["Borough"], inplace=True)
cluster_0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


Unnamed: 0,Borough,Fitness,Cluster Labels,Latitude,Longitude
0,Allach,0.0,0,48.18994,11.46029
1,Altstadt,0.0,0,48.13641,11.57754
4,Am Riesenfeld,0.01,0,48.183092,11.563078
5,Au,0.0,0,48.12637,11.58331
8,Bogenhausen,0.01,0,48.13999,11.60361
9,Daglfing,0.013699,0,48.143,11.66127
12,Fasangarten,0.023256,0,48.09098,11.61058
13,Feldmoching,0.0,0,48.21461,11.5349
15,Freiham,0.0,0,48.13467,11.4102
18,Giesing,0.02,0,48.10962,11.57534


### Cluster 1

In [57]:
cluster_1=muc_merged.loc[muc_merged['Cluster Labels'] == 1]
cluster_1.sort_values(["Borough"], inplace=True)
cluster_1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


Unnamed: 0,Borough,Fitness,Cluster Labels,Latitude,Longitude
2,Am Hart,0.025974,1,48.19651,11.572
3,Am Moosfeld,0.023529,1,48.13244,11.6598
6,Aubing,0.034483,1,48.15726,11.41578
7,Berg am Laim,0.03125,1,48.13132,11.63209
10,Denning,0.04,1,48.14932,11.64051
11,Englschalking,0.042105,1,48.15598,11.64168
14,Forstenried,0.044118,1,48.08328,11.48448
16,Freimann,0.051282,1,48.19002,11.61542
23,Holzapfelkreuth,0.025,1,48.115298,11.493999
23,Holzapfelkreuth,0.025,1,48.115298,11.493999


### Cluster 2

In [39]:
muc_merged.loc[muc_merged['Cluster Labels'] == 2]

Unnamed: 0,Borough,Fitness,Cluster Labels,Latitude,Longitude
17,Fürstenried,0.066667,2,48.09497,11.49866
45,Solln,0.080645,2,48.07969,11.52504
