# Venue distribution in Zurich Quartier

This is an example how to find all venues of a given category from foursquare data and determine in which neighbourhood/quartier they are located. The end gives an example how to the compare two venue categories and figure out which one is more prevalent in a given neighbourhood/quartier. 

### Import libraries

In [1]:
import pandas as pd
import numpy as np
import re
import requests # library to handle requests
from bs4 import BeautifulSoup
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

print('Investigating the neighbourhoods of Zurich!')

Investigating the neighbourhoods of Zurich!


### Fetch Zurich neighbourhood table from Wiki

In [2]:
## URL of table
zh_url='https://de.wikipedia.org/wiki/Stadtteile_der_Stadt_Zürich'

### get page content and parse
zh_page = requests.get(zh_url).text
zh_parsed = BeautifulSoup(zh_page,'xml')

### find all tables and table rows
zh_table = zh_parsed.findAll('table',{'class':'wikitable sortable'})[1]
zh_table_rows = zh_table.find_all('tr')

### Extract website information from scraped data

In [3]:
data = []
### read our rows from data
for row in zh_table_rows:
    data.append([t.text.strip() for t in row.find_all('td')])

### omit first and last row
data = data[1:-1]
addition = []

## Pre-processing
### Deal with nested table on wiki

In [4]:
### clean nested table rows
for quartier in data:

    ### check if borough is in row, remember for next row
    if quartier[0].find('Kreis') == 0:
        ### space after Kreis XY
        quartier[0] = re.sub(r"([0-9]+(\.[0-9]+)?)",r" \1 ", quartier[0]).strip()
        addition = quartier[0]
    elif quartier[0].find('Kreis') != 0:
        quartier.insert(0, addition) 
    
    ### remove entry of sigil
    if quartier[1] == '':
        del quartier[1]
        
    ### remove non-sensical strings
    for j, ele in enumerate(quartier):
        quartier[j] = re.sub(r'^.*?!', '', ele)

### Read data into dataframe and assign columns

In [5]:
### assign columns to table entries
zh_data = pd.DataFrame(data, columns=['Kreis', 'Quartier', 'BFS-Code', 'Included', 'Area in km²', 'Inhabitants (2018)', 'Inhabitants (2013)', 
 'Inhabitants (2005)', 'Immigrants'])
zh_data.head(34)

Unnamed: 0,Kreis,Quartier,BFS-Code,Included,Area in km²,Inhabitants (2018),Inhabitants (2013),Inhabitants (2005),Immigrants
0,Kreis 1 Altstadt,Rathaus,261011,vor 1893,0.38,3267,3194,3081,"30,1 %"
1,Kreis 1 Altstadt,Hochschulen,261012,vor 1893,0.56,664,665,695,"34,3 %"
2,Kreis 1 Altstadt,Lindenhof,261013,vor 1893,0.23,990,923,950,"30,1 %"
3,Kreis 1 Altstadt,City,261014,vor 1893,0.64,829,783,846,"30,0 %"
4,Kreis 2,Wollishofen,261021,1893,5.75,18'923,15'937,15'592,"29,1 %"
5,Kreis 2,Leimbach,261023,1893,2.92,6320,5730,4867,"33,6 %"
6,Kreis 2,Enge,261024,1893,2.4,9634,8836,8375,"36,7 %"
7,Kreis 3 Wiedikon,Alt-Wiedikon,261031,1893,1.85,17'956,16'706,14'971,"34,8 %"
8,Kreis 3 Wiedikon,Friesenberg,261033,1893,5.15,10'933,10'696,10'360,"18,3 %"
9,Kreis 3 Wiedikon,Sihlfeld,261034,1893,1.64,21'680,20'931,20'554,"31,2 %"


## Plotting map
### Load libraries for mapping

In [6]:
from geopy.geocoders import Nominatim

import matplotlib.cm as cm
import matplotlib.colors as colors

import geopandas as gpd
from shapely.geometry import shape
import geojson

import folium



### Find Lat/Lon data of Zurich

In [7]:
address = 'ZURICH'

geolocator = Nominatim(user_agent="zurich_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The lati & long coordinate of Zurich are {}, {}.'.format(latitude, longitude))

The lati & long coordinate of Zurich are 47.3744489, 8.5410422.


### Load map of Zurich

In [8]:
zh_map = folium.Map(location=[latitude, longitude], zoom_start=12)
# zh_map

### Load polygons of neighbourhoods 

In [9]:
### load polygon data for quartiere/neighbourhoods
zh_geo = gpd.read_file('./data/stzh.adm_statistische_quartiere_map.shp')  
### rename borough/kreis and neighbourhood/quartier
zh_geo.rename(columns = {'qname':'Quartier','kname':'Kreis'}, inplace = True) 
### convert to lat/lon
gjson = zh_geo.to_crs(epsg='4326').to_json()
### add polygons to map
folium.GeoJson(gjson).add_to(zh_map)
# zh_map

<folium.features.GeoJson at 0x7fa9902851c0>

### Obtain quartier/neighbourhood centroids from circumference polygons

In [10]:
### convert from swiss encoding to lat/lon
zh_poly = zh_geo.to_crs(epsg='4326')

### initialize polygon centroids
lat_centroid = []
lon_centroid = []

### loop through quartiers/neighbourhoods
for i in range(len(zh_poly)):
    ### convert to shape object
    p = shape(zh_poly['geometry'][i])
    ### extract polygon points
    lon_polygon, lat_polygon = p.exterior.coords.xy
    
    ### initialize centroid for each polygon
    lat = 0
    lon = 0
    ### add up all points
    for j in range(len(lon_polygon)):
        lat += lat_polygon[j]
        lon += lon_polygon[j]

    ### final coordinates
    lat_centroid.append(lat/len(lat_polygon))
    lon_centroid.append(lon/len(lon_polygon))

# quartier centroid lat/lon coordinates
zh_geo['Latitude'] = lat_centroid
zh_geo['Longitude'] = lon_centroid

zh_geo.head()

Unnamed: 0,objectid,objid,qnr,Quartier,knr,Kreis,geometry,Latitude,Longitude
0,1,34,73.0,Hirslanden,7.0,Kreis 7,"POLYGON ((2684457.034 1246514.804, 2684466.315...",47.36202,8.572849
1,2,33,83.0,Weinegg,8.0,Kreis 8,"POLYGON ((2684457.383 1246512.719, 2684458.291...",47.356528,8.567511
2,3,32,82.0,MÃ¼hlebach,8.0,Kreis 8,"POLYGON ((2684269.913 1246566.796, 2684271.618...",47.358239,8.556874
3,4,31,81.0,Seefeld,8.0,Kreis 8,"POLYGON ((2683794.254 1246609.895, 2683802.117...",47.357012,8.55488
4,7,16,41.0,Werd,4.0,Kreis 4,"POLYGON ((2682651.888 1247587.653, 2682650.697...",47.372574,8.526534


### Visualize all Zurich neighbourhood centroids

In [11]:
# add quartier markers to map
for lat, lon, neighbourhood, kreis in zip(zh_geo['Latitude'], zh_geo['Longitude'], zh_geo['Quartier'], zh_geo['Kreis']):
    label = '{} , {}'.format(kreis, neighbourhood)
    label = folium.Popup(label)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color='blue',
        fill=False
    ).add_to(zh_map)  
    
zh_map

In [12]:
zh_data = zh_data.merge(zh_geo.drop(columns=['Kreis']), left_on='Quartier', right_on='Quartier')
zh_data.head()

Unnamed: 0,Kreis,Quartier,BFS-Code,Included,Area in km²,Inhabitants (2018),Inhabitants (2013),Inhabitants (2005),Immigrants,objectid,objid,qnr,knr,geometry,Latitude,Longitude
0,Kreis 1 Altstadt,Rathaus,261011,vor 1893,0.38,3267,3194,3081,"30,1 %",23,12,11.0,1.0,"POLYGON ((2683374.124 1246786.080, 2683374.192...",47.370203,8.543717
1,Kreis 1 Altstadt,Hochschulen,261012,vor 1893,0.56,664,665,695,"34,3 %",29,6,12.0,1.0,"POLYGON ((2683993.543 1247428.241, 2683993.817...",47.372247,8.545615
2,Kreis 1 Altstadt,Lindenhof,261013,vor 1893,0.23,990,923,950,"30,1 %",16,23,13.0,1.0,"POLYGON ((2683168.047 1246706.173, 2683187.786...",47.36993,8.541478
3,Kreis 1 Altstadt,City,261014,vor 1893,0.64,829,783,846,"30,0 %",22,17,14.0,1.0,"POLYGON ((2683325.312 1247912.255, 2683325.292...",47.37365,8.537898
4,Kreis 2,Wollishofen,261021,1893,5.75,18'923,15'937,15'592,"29,1 %",5,30,21.0,2.0,"POLYGON ((2683464.971 1243316.936, 2683455.914...",47.339905,8.528241


### Load Foursquare info

In [None]:
CLIENT_ID = 'foursquare-id-here' # your Foursquare ID
CLIENT_SECRET = 'foursquare-secret-here' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

### function that extracts the category of the venue


In [14]:
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

## 2. Explore Neighbourhoods/Quartiere in Zurich


In [15]:
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print('Scraping venues for quartier:', name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

## Look into specific category

In [16]:
### number of venues
LIMIT = 50
radius = 1000 # define radius

def getNearbyVenuesCategory(names, latitudes, longitudes, foursquare_category_id, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print('Scraping venues for quartier:', name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&categoryId={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT,
            foursquare_category_id)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']

        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])
        
    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighbourhood', 
                  'Neighbourhood Latitude', 
                  'Neighbourhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

### Extract specific categories

In [17]:
foursquare_id_yoga = '4bf58dd8d48988d102941735'
foursquare_id_gym = '4bf58dd8d48988d175941735'

### pull all yoga venues
zh_venues_yoga = getNearbyVenuesCategory(names=zh_data['Quartier'],
                                   latitudes=zh_data['Latitude'],
                                   longitudes=zh_data['Longitude'],
                                   foursquare_category_id=foursquare_id_yoga
                                  )

### pull all gyms venues
zh_venues_gym = getNearbyVenuesCategory(names=zh_data['Quartier'],
                                   latitudes=zh_data['Latitude'],
                                   longitudes=zh_data['Longitude'],
                                   foursquare_category_id=foursquare_id_gym
                                  )

Scraping venues for quartier: Rathaus
Scraping venues for quartier: Hochschulen
Scraping venues for quartier: Lindenhof
Scraping venues for quartier: City
Scraping venues for quartier: Wollishofen
Scraping venues for quartier: Leimbach
Scraping venues for quartier: Enge
Scraping venues for quartier: Alt-Wiedikon
Scraping venues for quartier: Friesenberg
Scraping venues for quartier: Sihlfeld
Scraping venues for quartier: Werd
Scraping venues for quartier: Langstrasse
Scraping venues for quartier: Hard
Scraping venues for quartier: Gewerbeschule
Scraping venues for quartier: Escher Wyss
Scraping venues for quartier: Unterstrass
Scraping venues for quartier: Oberstrass
Scraping venues for quartier: Fluntern
Scraping venues for quartier: Hottingen
Scraping venues for quartier: Hirslanden
Scraping venues for quartier: Witikon
Scraping venues for quartier: Seefeld
Scraping venues for quartier: Weinegg
Scraping venues for quartier: Albisrieden
Scraping venues for quartier: Altstetten
Scrapin

### Check if venue is actually in quartier/neighbourhood

In [18]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

### initialize array of venues not actually in quartier
not_in_quartier = []

### Go through all venues
for i in range(len(zh_venues_yoga['Venue'])):

    quartier_polygon = zh_poly[ zh_poly['Quartier'] == zh_venues_yoga.loc[i, 'Neighbourhood'] ].reset_index(drop=True)
    
    lat = zh_venues_yoga.loc[i, 'Venue Latitude']
    lon = zh_venues_yoga.loc[i, 'Venue Longitude']
    point = Point([lon, lat])
    
    in_quartier = quartier_polygon.contains(point)

    ### store row index if venue is not in quartier
    if in_quartier[0] == False: 
        not_in_quartier.append(i)
        
zh_venues_yoga_checked = zh_venues_yoga.drop(not_in_quartier)

zh_venues_yoga_checked.head()

Unnamed: 0,Neighbourhood,Neighbourhood Latitude,Neighbourhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Rathaus,47.370203,8.543717,AirYoga,47.368315,8.545928,Yoga Studio
1,Rathaus,47.370203,8.543717,athayoga,47.370736,8.545406,Yoga Studio
17,City,47.37365,8.537898,PowerQiBalance,47.373033,8.532857,Yoga Studio
21,Wollishofen,47.339905,8.528241,SAMiRAYOGA,47.344446,8.526638,Yoga Studio
22,Enge,47.358245,8.53004,Rock'n'Well,47.358996,8.523873,Yoga Studio


## Map of checked venues

In [19]:
# add yoga markers to map
for lat, lng, label in zip(zh_venues_yoga['Venue Latitude'], zh_venues_yoga['Venue Longitude'], zh_venues_yoga['Venue']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='red',
        fill=False,
        parse_html=False).add_to(zh_map)  
    
# add gym markers to map
for lat, lng, label in zip(zh_venues_gym['Venue Latitude'], zh_venues_gym['Venue Longitude'], zh_venues_gym['Venue']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=3,
        popup=label,
        color='green',
        fill=False,
        parse_html=False).add_to(zh_map) 

zh_map

### Count Yoga studios and gyms per neighbourhood

In [24]:
columns_to_drop = ['Neighbourhood Latitude', 'Neighbourhood Longitude', 'Venue Latitude', 'Venue Longitude', 'Venue Category']

zh_venues_total_yoga = zh_venues_yoga.groupby('Neighbourhood').count().drop(columns=columns_to_drop)
zh_venues_total_gym = zh_venues_gym.groupby('Neighbourhood').count().drop(columns=columns_to_drop)

zh_venues_total_comparison = zh_venues_total_yoga.merge(zh_venues_total_gym, left_on='Neighbourhood', right_on='Neighbourhood', suffixes=('_yoga', '_gym'))
zh_venues_total_comparison['Gym/Yoga'] = zh_venues_total_comparison['Venue_gym'].div(zh_venues_total_comparison['Venue_yoga'], axis=0).round(2)
zh_venues_total_comparison.head(34)

Unnamed: 0_level_0,Venue_yoga,Venue_gym,Gym/Yoga
Neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Affoltern,1,4,4.0
Alt-Wiedikon,3,7,2.33
City,6,25,4.17
Enge,5,7,1.4
Escher Wyss,5,13,2.6
Fluntern,2,2,1.0
Gewerbeschule,4,18,4.5
Hard,2,9,4.5
Hirslanden,1,2,2.0
Hirzenbach,1,1,1.0
