In [1]:
from geopy.geocoders import Nominatim

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

import folium # map rendering library

In [2]:
miete = pd.read_html('https://www.virtualvienna.net/moving-to-vienna/accommodation/real-estate-rental-prices/',decimal=',', na_values="k.A.", thousands=".") 
Miete = miete[0] 
Miete.head()

Unnamed: 0,District,<50 m²,51-80 m²,51-129 m²,>130 m²,average/m²
0,"1., Innere Stadt",23.14,20.27,18.57,19.65,19.44
1,"2., Leopoldstadt",16.63,14.9,14.03,13.91,14.57
2,"3., Landstraße",17.75,13.87,14.72,14.2,14.51
3,"4., Wieden",16.01,13.62,14.11,15.58,14.74
4,"5., Margareten",15.59,14.6,14.53,16.35,14.84


In [3]:
Miete['District'] = Miete['District'].str.replace('[0-9]*[.][,][ ]', '', regex=True)
Miete.head()

Unnamed: 0,District,<50 m²,51-80 m²,51-129 m²,>130 m²,average/m²
0,Innere Stadt,23.14,20.27,18.57,19.65,19.44
1,Leopoldstadt,16.63,14.9,14.03,13.91,14.57
2,Landstraße,17.75,13.87,14.72,14.2,14.51
3,Wieden,16.01,13.62,14.11,15.58,14.74
4,Margareten,15.59,14.6,14.53,16.35,14.84


In [4]:
area = pd.read_html('https://www.wien.gv.at/statistik/lebensraum/tabellen/nutzungsklassen-bez.html',decimal=',', na_values="k.A.", thousands=".") 
District_area = area[0] 
District_area.head(2)

Unnamed: 0_level_0,Bezirk,Flächen basierend auf rechtlichen Bezirksgrenzen in ha,Bauflächen in ha,Bauflächen in ha,Bauflächen in ha,Grünflächen in ha,Gewässer in ha,Verkehrs- flächen in ha
Unnamed: 0_level_1,Bezirk,Flächen basierend auf rechtlichen Bezirksgrenzen in ha,Gesamtfläche,Wohnbau- gebiete,"Kultur-, Sport-, rel. u. öffentl. Einr.",Grünflächen in ha,Gewässer in ha,Verkehrs- flächen in ha
0,Wien,41487.1,14911.1,10529.5,1886.8,18668.7,1915.7,5991.5
1,1. Innere Stadt,286.9,141.7,65.8,29.7,27.3,3.1,114.9


In [5]:
District_area.columns = ['District', 'district_size', 'total area', 'Residential_areas', 'Public_area','Green_area', 'Water_area', 'Traffic_area']

In [6]:
District_area.drop([0], inplace = True)
District_area.head()

Unnamed: 0,District,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area
1,1. Innere Stadt,286.9,141.7,65.8,29.7,27.3,3.1,114.9
2,2. Leopoldstadt,1924.2,437.8,278.7,125.1,674.5,410,402.0
3,3. Landstraße,739.8,412.1,215.9,81.6,110.7,0.5,216.5
4,4. Wieden,177.5,114.4,94.9,12.8,17.7,-,45.4
5,5. Margareten,201.2,129.4,113.6,4.4,8.8,-,63.0


In [7]:
District_area['District'] = District_area['District'].str.replace('[0-9]*[.][ ]', '', regex=True)
District_area.head()

Unnamed: 0,District,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area
1,Innere Stadt,286.9,141.7,65.8,29.7,27.3,3.1,114.9
2,Leopoldstadt,1924.2,437.8,278.7,125.1,674.5,410,402.0
3,Landstraße,739.8,412.1,215.9,81.6,110.7,0.5,216.5
4,Wieden,177.5,114.4,94.9,12.8,17.7,-,45.4
5,Margareten,201.2,129.4,113.6,4.4,8.8,-,63.0


In [8]:
Miete = Miete.merge(District_area, on = 'District')
Miete

Unnamed: 0,District,<50 m²,51-80 m²,51-129 m²,>130 m²,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area
0,Innere Stadt,23.14,20.27,18.57,19.65,19.44,286.9,141.7,65.8,29.7,27.3,3.1,114.9
1,Leopoldstadt,16.63,14.9,14.03,13.91,14.57,1924.2,437.8,278.7,125.1,674.5,410,402.0
2,Landstraße,17.75,13.87,14.72,14.2,14.51,739.8,412.1,215.9,81.6,110.7,0.5,216.5
3,Wieden,16.01,13.62,14.11,15.58,14.74,177.5,114.4,94.9,12.8,17.7,-,45.4
4,Margareten,15.59,14.6,14.53,16.35,14.84,201.2,129.4,113.6,4.4,8.8,-,63.0
5,Mariahilf,15.6,14.11,14.28,14.03,14.27,145.5,96.1,76.5,7.7,3.0,2.7,43.7
6,Neubau,17.63,14.34,14.1,14.87,14.68,160.8,116.8,92.4,14.3,3.7,-,40.4
7,Josefstadt,16.29,14.97,14.31,13.78,14.6,109.0,76.3,66.4,4.6,2.0,-,30.7
8,Alsergrund,16.73,15.28,14.33,14.51,14.59,296.7,179.7,104.2,66.1,22.2,-,94.9
9,Favoriten,15.0,12.96,12.27,10.05,13.23,3182.8,1135.0,750.1,205.1,1416.0,42.8,589.0


In [9]:
URL = 'https://data.wien.gv.at/daten/geo?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:BEZIRKSGRENZEOGD&srsName=EPSG:4326&outputFormat=json'
ViennaData = json.loads(requests.get(URL).text) 

In [10]:
district_data = ViennaData['features']

In [11]:
# define the dataframe columns
column_names = ['District_Name', 'PostCode', 'Longitude','Latitude'] 

# instantiate the dataframe
district = pd.DataFrame(columns=column_names)
district

Unnamed: 0,District_Name,PostCode,Longitude,Latitude


In [12]:
district = pd.DataFrame({
'District':[i['properties']['NAMEK'] for i in district_data],
'PostCode' : [i['properties']['DISTRICT_CODE'] for i in district_data],
'Longitude' : [i['geometry']['coordinates'][0][100][0] for i in district_data],
'Latitude' : [i['geometry']['coordinates'][0][100][1] for i in district_data],
'number of Corrdinates': [len(i['geometry']['coordinates'][0]) for i in district_data]
})
district.head()

Unnamed: 0,District,PostCode,Longitude,Latitude,number of Corrdinates
0,Neubau,1070,16.336911,48.205887,509
1,Landstraße,1030,16.396008,48.2091,3266
2,Josefstadt,1080,16.339087,48.211701,458
3,Innere Stadt,1010,16.364741,48.215799,1409
4,Ottakring,1160,16.257403,48.224122,2247


In [13]:
Vienna = Miete.merge(district, on="District")
#Vienna.drop(['District'], axis = 1, inplace = True)
Vienna.head()

Unnamed: 0,District,<50 m²,51-80 m²,51-129 m²,>130 m²,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates
0,Innere Stadt,23.14,20.27,18.57,19.65,19.44,286.9,141.7,65.8,29.7,27.3,3.1,114.9,1010,16.364741,48.215799,1409
1,Leopoldstadt,16.63,14.9,14.03,13.91,14.57,1924.2,437.8,278.7,125.1,674.5,410,402.0,1020,16.383204,48.225601,3800
2,Landstraße,17.75,13.87,14.72,14.2,14.51,739.8,412.1,215.9,81.6,110.7,0.5,216.5,1030,16.396008,48.2091,3266
3,Wieden,16.01,13.62,14.11,15.58,14.74,177.5,114.4,94.9,12.8,17.7,-,45.4,1040,16.373235,48.185059,407
4,Margareten,15.59,14.6,14.53,16.35,14.84,201.2,129.4,113.6,4.4,8.8,-,63.0,1050,16.355895,48.196183,957


In [14]:
address = 'Vienna'

geolocator = Nominatim(user_agent="Vienna_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Vienna City are {}, {}.'.format(longitude, latitude))

The geograpical coordinate of Vienna City are 16.3725042, 48.2083537.


In [15]:
# create map of Vienna using latitude and longitude values
map_Vienna = folium.Map(location=[latitude, longitude], zoom_start=12)

In [16]:
# add markers to map
for lat, lng, District_Name, PostCode in zip(district['Latitude'], district['Longitude'], district['District'], district['PostCode']):
    label = '{}, {}'.format(District_Name, PostCode)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_Vienna)  
    
map_Vienna

In [17]:
CLIENT_ID = 'ASTVFHRB5LE1HCTUHZTDRDRKAKMKLU0R3I03AGJSPWTTPX5M' # your Foursquare ID
CLIENT_SECRET = '50JYEAMD1A01JY5CCASMOV0EGBPDXRR0FERAHRNOPOJM3JJO' #your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: ASTVFHRB5LE1HCTUHZTDRDRKAKMKLU0R3I03AGJSPWTTPX5M
CLIENT_SECRET:50JYEAMD1A01JY5CCASMOV0EGBPDXRR0FERAHRNOPOJM3JJO


In [18]:
innerstadt_latitude = Vienna.loc[0, 'Latitude'] # neighborhood latitude value
innerstadt_longitude = Vienna.loc[0, 'Longitude'] # neighborhood longitude value
District_name = Vienna.loc[0, 'District'] # neighborhood name

print(District_name, innerstadt_latitude, innerstadt_longitude, )


Innere Stadt 48.2157989674 16.3647409839


In [19]:
LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    innerstadt_latitude, 
    innerstadt_longitude, 
    radius, 
    LIMIT)
url # display URL

'https://api.foursquare.com/v2/venues/explore?&client_id=ASTVFHRB5LE1HCTUHZTDRDRKAKMKLU0R3I03AGJSPWTTPX5M&client_secret=50JYEAMD1A01JY5CCASMOV0EGBPDXRR0FERAHRNOPOJM3JJO&v=20180605&ll=48.215798967393766,16.364740983944866&radius=500&limit=100'

In [20]:
results = requests.get(url).json()
results

{'meta': {'code': 200, 'requestId': '5ce9780b9fb6b7757e63d30a'},
 'response': {'groups': [{'items': [{'reasons': {'count': 0,
       'items': [{'reasonName': 'globalInteractionReason',
         'summary': 'This spot is popular',
         'type': 'general'}]},
      'referralId': 'e-0-533b0604498e2f5144a00f1b-0',
      'venue': {'categories': [{'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/cafe_',
          'suffix': '.png'},
         'id': '4bf58dd8d48988d16d941735',
         'name': 'Café',
         'pluralName': 'Cafés',
         'primary': True,
         'shortName': 'Café'}],
       'id': '533b0604498e2f5144a00f1b',
       'location': {'address': 'Kolingasse 5',
        'cc': 'AT',
        'city': 'Wien',
        'country': 'Österreich',
        'distance': 178,
        'formattedAddress': ['Kolingasse 5', '1090 Wien', 'Österreich'],
        'labeledLatLngs': [{'label': 'display',
          'lat': 48.21577399043162,
          'lng': 16.362334081855774}],
        '

In [21]:
# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

In [22]:
venues = results['response']['groups'][0]['items']
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['venue.name', 'venue.categories', 'venue.location.lat', 'venue.location.lng']
nearby_venues =nearby_venues.loc[:, filtered_columns]

# filter the category for each row
nearby_venues['venue.categories'] = nearby_venues.apply(get_category_type, axis=1)

# clean columns
nearby_venues.columns = [col.split(".")[-1] for col in nearby_venues.columns]

nearby_venues.head()

Unnamed: 0,name,categories,lat,lng
0,Das Kolin,Café,48.215774,16.362334
1,Hansen Restaurant Börse,Austrian Restaurant,48.214652,16.366133
2,Jonas Reindl Coffee,Café,48.214727,16.361707
3,Hilton Executive Lounge,Hotel Bar,48.21535,16.364871
4,Pizzeria Riva,Pizza Place,48.218105,16.365109


In [23]:
nearby_venues['categories'].value_counts()

Café                             5
Hotel                            5
Italian Restaurant               3
Soup Place                       3
Austrian Restaurant              3
Coffee Shop                      3
Wine Bar                         2
Art Gallery                      2
Cocktail Bar                     2
Gastropub                        2
Plaza                            2
Restaurant                       2
Irish Pub                        2
Salad Place                      1
College Quad                     1
Historic Site                    1
Pizza Place                      1
Steakhouse                       1
Gym                              1
Farmers Market                   1
Bookstore                        1
Park                             1
Shopping Mall                    1
Falafel Restaurant               1
Fast Food Restaurant             1
Candy Store                      1
Bakery                           1
Breakfast Spot                   1
Yoga Studio         

In [24]:
# create a function to repeat the same process to all the district
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [25]:
Vienna_venues = getNearbyVenues(names=Vienna['District'],
                                   latitudes=Vienna['Latitude'],
                                   longitudes=Vienna['Longitude']
                                  )



Innere Stadt
Leopoldstadt
Landstraße
Wieden
Margareten
Mariahilf
Neubau
Josefstadt
Alsergrund
Favoriten
Simmering
Meidling
Hietzing
Penzing
Ottakring
Hernals
Währing
Döbling
Brigittenau
Floridsdorf
Donaustadt
Liesing


In [26]:
# one hot encoding
Vienna_onehot = pd.get_dummies(Vienna_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
Vienna_onehot['District'] =Vienna['District'] 

# move neighborhood column to the first column
fixed_columns = [Vienna_onehot.columns[-1]] + list(Vienna_onehot.columns[:-1])
Vienna_onehot = Vienna_onehot[fixed_columns]
Vienna_onehot.head()

Unnamed: 0,District,American Restaurant,Aquarium,Arcade,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Austrian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Beer Garden,Beer Store,Bistro,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Café,Candy Store,Casino,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Quad,Concert Hall,Cosmetics Shop,Creperie,Cupcake Shop,Czech Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Dive Bar,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Flea Market,Food,Food & Drink Shop,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gay Bar,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Historic Site,Hobby Shop,Hookah Bar,Hostel,Hotel,Hotel Bar,Hungarian Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Karaoke Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Locksmith,Lounge,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Monument / Landmark,Movie Theater,Multiplex,Museum,Music Store,Music Venue,Nightclub,Noodle House,Optical Shop,Organic Grocery,Pakistani Restaurant,Palace,Park,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pool Hall,Pub,Ramen Restaurant,Record Shop,Restaurant,Road,Rock Club,Salad Place,Sandwich Place,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Soup Place,Spanish Restaurant,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Thai Restaurant,Theater,Tibetan Restaurant,Train Station,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Innere Stadt,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Leopoldstadt,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Landstraße,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Wieden,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Margareten,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
Vienna_onehot.shape

(575, 152)

In [28]:
Vienna_grouped = Vienna_onehot.groupby('District').mean().reset_index()
Vienna_grouped.head()

Unnamed: 0,District,American Restaurant,Aquarium,Arcade,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Austrian Restaurant,BBQ Joint,Bagel Shop,Bakery,Bar,Beer Garden,Beer Store,Bistro,Bookstore,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Stop,Café,Candy Store,Casino,Cheese Shop,Chinese Restaurant,Chocolate Shop,Church,Clothing Store,Cocktail Bar,Coffee Shop,College Quad,Concert Hall,Cosmetics Shop,Creperie,Cupcake Shop,Czech Restaurant,Dessert Shop,Dim Sum Restaurant,Diner,Dive Bar,Doner Restaurant,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Event Space,Exhibit,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Fish Market,Flea Market,Food,Food & Drink Shop,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gastropub,Gay Bar,German Restaurant,Gift Shop,Gluten-free Restaurant,Gourmet Shop,Greek Restaurant,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Historic Site,Hobby Shop,Hookah Bar,Hostel,Hotel,Hotel Bar,Hungarian Restaurant,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Irish Pub,Italian Restaurant,Japanese Restaurant,Juice Bar,Karaoke Bar,Korean Restaurant,Latin American Restaurant,Liquor Store,Locksmith,Lounge,Market,Martial Arts Dojo,Massage Studio,Mediterranean Restaurant,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Monument / Landmark,Movie Theater,Multiplex,Museum,Music Store,Music Venue,Nightclub,Noodle House,Optical Shop,Organic Grocery,Pakistani Restaurant,Palace,Park,Performing Arts Venue,Persian Restaurant,Peruvian Restaurant,Pharmacy,Pizza Place,Playground,Plaza,Pool,Pool Hall,Pub,Ramen Restaurant,Record Shop,Restaurant,Road,Rock Club,Salad Place,Sandwich Place,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Soup Place,Spanish Restaurant,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Tattoo Parlor,Tea Room,Thai Restaurant,Theater,Tibetan Restaurant,Train Station,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Alsergrund,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Brigittenau,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Donaustadt,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,Döbling,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Favoriten,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [29]:
Vienna_grouped.shape

(22, 152)

In [30]:
num_top_venues = 10

for hood in Vienna_grouped['District']:
    print("----"+hood+"----")
    temp = Vienna_grouped[Vienna_grouped['District'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

----Alsergrund----
                 venue  freq
0          Coffee Shop   1.0
1  American Restaurant   0.0
2         Optical Shop   0.0
3        Movie Theater   0.0
4            Multiplex   0.0
5               Museum   0.0
6          Music Store   0.0
7          Music Venue   0.0
8            Nightclub   0.0
9         Noodle House   0.0


----Brigittenau----
                 venue  freq
0                 Park   1.0
1  American Restaurant   0.0
2         Noodle House   0.0
3        Movie Theater   0.0
4            Multiplex   0.0
5               Museum   0.0
6          Music Store   0.0
7          Music Venue   0.0
8            Nightclub   0.0
9         Optical Shop   0.0


----Donaustadt----
                       venue  freq
0                Yoga Studio   1.0
1  Middle Eastern Restaurant   0.0
2              Movie Theater   0.0
3                  Multiplex   0.0
4                     Museum   0.0
5                Music Store   0.0
6                Music Venue   0.0
7                  N

In [76]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [77]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
district_venues_sorted = pd.DataFrame(columns=columns)
district_venues_sorted['District'] = Vienna_grouped['District']

for ind in np.arange(Vienna_grouped.shape[0]):
    district_venues_sorted.iloc[ind, 1:] = return_most_common_venues(Vienna_grouped.iloc[ind, :], num_top_venues)

district_venues_sorted

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alsergrund,Coffee Shop,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant
1,Brigittenau,Park,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant
2,Donaustadt,Yoga Studio,Fried Chicken Joint,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Exhibit
3,Döbling,Cocktail Bar,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant
4,Favoriten,Peruvian Restaurant,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space
5,Floridsdorf,Restaurant,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Yoga Studio
6,Hernals,Juice Bar,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant
7,Hietzing,Indian Restaurant,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant
8,Innere Stadt,Café,Yoga Studio,Falafel Restaurant,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Exhibit
9,Josefstadt,Pakistani Restaurant,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant


In [78]:
# set number of clusters
kclusters = 8

Vienna_grouped_clustering = Vienna_grouped.drop('District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(Vienna_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([5, 0, 0, 0, 0, 7, 0, 0, 4, 0], dtype=int32)

In [79]:
# add clustering labels
district_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

In [80]:

Vienna_merged = Vienna

In [81]:
Vienna_merged = Vienna_merged.join(district_venues_sorted.set_index('District'), on='District')

In [82]:
Vienna_merged.head()

Unnamed: 0,District,<50 m²,51-80 m²,51-129 m²,>130 m²,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Innere Stadt,23.14,20.27,18.57,19.65,19.44,286.9,141.7,65.8,29.7,27.3,3.1,114.9,1010,16.364741,48.215799,1409,4,Café,Yoga Studio,Falafel Restaurant,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Exhibit
1,Leopoldstadt,16.63,14.9,14.03,13.91,14.57,1924.2,437.8,278.7,125.1,674.5,410,402.0,1020,16.383204,48.225601,3800,3,Austrian Restaurant,Yoga Studio,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant
2,Landstraße,17.75,13.87,14.72,14.2,14.51,739.8,412.1,215.9,81.6,110.7,0.5,216.5,1030,16.396008,48.2091,3266,4,Café,Yoga Studio,Falafel Restaurant,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Exhibit
3,Wieden,16.01,13.62,14.11,15.58,14.74,177.5,114.4,94.9,12.8,17.7,-,45.4,1040,16.373235,48.185059,407,2,Hotel Bar,Fried Chicken Joint,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Exhibit
4,Margareten,15.59,14.6,14.53,16.35,14.84,201.2,129.4,113.6,4.4,8.8,-,63.0,1050,16.355895,48.196183,957,0,Pizza Place,Exhibit,Food & Drink Shop,Food,Flea Market,Fish Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Event Space


In [83]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(Vienna_merged['Latitude'], Vienna_merged['Longitude'], Vienna_merged['District'], Vienna_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [84]:
Vienna_merged.loc[Vienna_merged['Cluster Labels'] == 0, Vienna_merged.columns[[0] + list(range(5, Vienna_merged.shape[0]))]]

Unnamed: 0,District,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue
4,Margareten,14.84,201.2,129.4,113.6,4.4,8.8,-,63.0,1050,16.355895,48.196183,957,0,Pizza Place,Exhibit,Food & Drink Shop,Food
7,Josefstadt,14.6,109.0,76.3,66.4,4.6,2.0,-,30.7,1080,16.339087,48.211701,458,0,Pakistani Restaurant,Yoga Studio,Exhibit,Food & Drink Shop
9,Favoriten,13.23,3182.8,1135.0,750.1,205.1,1416.0,42.8,589.0,1100,16.357556,48.180181,1575,0,Peruvian Restaurant,Exhibit,Food & Drink Shop,Food
12,Hietzing,14.14,3771.5,882.3,726.5,133.6,2651.6,15.2,222.4,1130,16.194809,48.165307,1038,0,Indian Restaurant,Yoga Studio,Exhibit,Food & Drink Shop
15,Hernals,12.51,1139.1,407.1,382.0,13.6,602.7,3.3,125.9,1170,16.270447,48.252356,2238,0,Juice Bar,Yoga Studio,Exhibit,Food & Drink Shop
16,Währing,15.12,634.7,356.2,337.7,17.0,171.1,-,107.4,1180,16.289677,48.245698,2235,0,Pub,Yoga Studio,Exhibit,Food & Drink Shop
17,Döbling,15.71,2494.4,902.1,768.5,56.8,1192.0,110.6,289.8,1190,16.34055,48.283464,1761,0,Cocktail Bar,Yoga Studio,Exhibit,Food & Drink Shop
18,Brigittenau,13.21,571.0,204.8,155.1,15.6,50.9,118.5,197.0,1200,16.368956,48.259314,1127,0,Park,Yoga Studio,Exhibit,Food & Drink Shop
20,Donaustadt,15.54,10229.9,2742.0,1847.9,327.7,5607.7,920.3,959.9,1220,16.487593,48.292985,2561,0,Yoga Studio,Fried Chicken Joint,Food & Drink Shop,Food
21,Liesing,13.27,3206.2,1728.8,1030.9,122.5,960.1,42.9,474.4,1230,16.221353,48.152719,2427,0,Gym,Exhibit,Food & Drink Shop,Food


In [85]:
Vienna_merged.loc[Vienna_merged['Cluster Labels'] == 1, Vienna_merged.columns[[0] + list(range(5, Vienna_merged.shape[0]))]]

Unnamed: 0,District,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue
5,Mariahilf,14.27,145.5,96.1,76.5,7.7,3.0,2.7,43.7,1060,16.359803,48.201312,1293,1,Hotel,Yoga Studio,Exhibit,Food & Drink Shop
6,Neubau,14.68,160.8,116.8,92.4,14.3,3.7,-,40.4,1070,16.336911,48.205887,509,1,Hotel,Yoga Studio,Exhibit,Food & Drink Shop
10,Simmering,11.95,2325.6,848.3,440.0,157.7,925.9,46.3,505.1,1110,16.417633,48.185798,1515,1,Hotel,Yoga Studio,Exhibit,Food & Drink Shop


In [86]:
Vienna_merged.loc[Vienna_merged['Cluster Labels'] == 2, Vienna_merged.columns[[0] + list(range(5, Vienna_merged.shape[0]))]]

Unnamed: 0,District,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue
3,Wieden,14.74,177.5,114.4,94.9,12.8,17.7,-,45.4,1040,16.373235,48.185059,407,2,Hotel Bar,Fried Chicken Joint,Food & Drink Shop,Food


In [87]:
Vienna_merged.loc[Vienna_merged['Cluster Labels'] == 3, Vienna_merged.columns[[0] + list(range(5, Vienna_merged.shape[0]))]]

Unnamed: 0,District,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue
1,Leopoldstadt,14.57,1924.2,437.8,278.7,125.1,674.5,410.0,402.0,1020,16.383204,48.225601,3800,3,Austrian Restaurant,Yoga Studio,Exhibit,Food & Drink Shop
13,Penzing,12.68,3376.3,997.3,812.0,118.7,2022.4,45.2,311.4,1140,16.205647,48.256421,3192,3,Austrian Restaurant,Yoga Studio,Exhibit,Food & Drink Shop


In [88]:
Vienna_merged.loc[Vienna_merged['Cluster Labels'] == 4, Vienna_merged.columns[[0] + list(range(5, Vienna_merged.shape[0]))]]

Unnamed: 0,District,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue
0,Innere Stadt,19.44,286.9,141.7,65.8,29.7,27.3,3.1,114.9,1010,16.364741,48.215799,1409,4,Café,Yoga Studio,Falafel Restaurant,Food & Drink Shop
2,Landstraße,14.51,739.8,412.1,215.9,81.6,110.7,0.5,216.5,1030,16.396008,48.2091,3266,4,Café,Yoga Studio,Falafel Restaurant,Food & Drink Shop
11,Meidling,13.65,810.3,485.8,367.1,51.8,101.2,-,223.4,1120,16.335084,48.185147,1461,4,Café,Yoga Studio,Falafel Restaurant,Food & Drink Shop


In [89]:
Vienna_merged.loc[Vienna_merged['Cluster Labels'] == 5, Vienna_merged.columns[[0] + list(range(5, Vienna_merged.shape[0]))]]

Unnamed: 0,District,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue
8,Alsergrund,14.59,296.7,179.7,104.2,66.1,22.2,-,94.9,1090,16.361147,48.233325,2308,5,Coffee Shop,Yoga Studio,Exhibit,Food & Drink Shop


In [90]:
Vienna_merged.loc[Vienna_merged['Cluster Labels'] == 6, Vienna_merged.columns[[0] + list(range(5, Vienna_merged.shape[0]))]]

Unnamed: 0,District,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue
14,Ottakring,12.63,867.3,446.6,381.3,46.1,261.2,-,159.5,1160,16.257403,48.224122,2247,6,Playground,Yoga Studio,Exhibit,Food & Drink Shop


In [91]:
Vienna_merged.loc[Vienna_merged['Cluster Labels'] == 7, Vienna_merged.columns[[0] + list(range(5, Vienna_merged.shape[0]))]]

Unnamed: 0,District,average/m²,district_size,total area,Residential_areas,Public_area,Green_area,Water_area,Traffic_area,PostCode,Longitude,Latitude,number of Corrdinates,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue
19,Floridsdorf,13.15,4444.3,1852.2,1230.7,258.5,1802.4,150.7,639.0,1210,16.395301,48.320571,2103,7,Restaurant,Exhibit,Food & Drink Shop,Food
