# Zurich Neighborhood mining

This notebook combines data sources (wikipedia and Foursquare API) to assemble a list of neighborhoods and map them to their coordinates for futher processing.

In [7]:
import pandas as pd

from foursquare import fetch_venues, venue_frequency, rank_venues_by_frequency
from geocoder import enrich_neighborhoods_with_geocoder, map_neighborhoods

### Scrape wikipedia to compile Frankfurt neighborhood list

In [2]:
tables = pd.read_html('https://en.wikipedia.org/wiki/Subdivisions_of_Z%C3%BCrich')
tables[0].head()

Unnamed: 0,0,1,2
0,Zürich Districts,Zürich Neighborhoods,Zürich Postal Areas


In [3]:
results = pd.concat([tables[i][0] for i in list(range(2, 74, 6))])
print(len(results))
print(results[0:5])

34
0        Rathaus
1    Hochschulen
2      Lindenhof
3           City
0    Wollishofen
Name: 0, dtype: object


#### Drop duplicates

In [4]:
results_set = set(results)
neighborhoods_list = list(results_set)
print(len(neighborhoods_list))

34


In [5]:
df = pd.DataFrame(neighborhoods_list)
df.columns = ['Neighborhood']
df.head()

Unnamed: 0,Neighborhood
0,Sihlfeld
1,Rathaus
2,Oberstrass
3,Friesenberg
4,Saatlen


### Combine wikipedia data with geocoder data

Since Wikipedia dataset does not include zip code coordinates, we should hydrate dataset with longitude and latitude from Geocoder in order to access Foursquare data.

In [6]:
address = 'Zurich, Switzerland'

In [7]:
enrich_neighborhoods_with_geocoder(df, address)
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Sihlfeld,47.373218,8.51082
1,Rathaus,47.372649,8.544311
2,Oberstrass,47.385706,8.549124
3,Friesenberg,47.358174,8.493922
4,Saatlen,47.412637,8.565912


In [8]:
df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Sihlfeld,47.373218,8.51082
1,Rathaus,47.372649,8.544311
2,Oberstrass,47.385706,8.549124
3,Friesenberg,47.358174,8.493922
4,Saatlen,47.412637,8.565912
5,City,47.372943,8.535346
6,Mühlebach,47.255395,8.695773
7,Fluntern,47.376777,8.558775
8,Hottingen,47.36968,8.555082
9,Seefeld,47.357783,8.551074


### Visualize Frankfurt neighborhoods
<a id="vis-neighborhoods"></a>

In [10]:
m = map_neighborhoods(df, address)
m

### Eliminate geospatial outliers

In [12]:
# Eliminate geospatial outliers
df.sort_values('Longitude')

Unnamed: 0,Neighborhood,Latitude,Longitude
31,Affoltern,47.278247,8.452152
25,Albisrieden,47.374857,8.484657
13,Altstetten,47.387403,8.486061
3,Friesenberg,47.358174,8.493922
23,Höngg,47.40166,8.497715
0,Sihlfeld,47.373218,8.51082
21,Hard,47.381743,8.512554
12,Escher Wyss,47.390899,8.51536
22,Alt-Wiedikon,47.365562,8.517851
27,Langstrasse,47.377273,8.526415


In [14]:
df.drop(32, inplace=True)
df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Sihlfeld,47.373218,8.51082
1,Rathaus,47.372649,8.544311
2,Oberstrass,47.385706,8.549124
3,Friesenberg,47.358174,8.493922
4,Saatlen,47.412637,8.565912
5,City,47.372943,8.535346
6,Mühlebach,47.255395,8.695773
7,Fluntern,47.376777,8.558775
8,Hottingen,47.36968,8.555082
9,Seefeld,47.357783,8.551074


In [15]:
df.sort_values('Latitude')

Unnamed: 0,Neighborhood,Latitude,Longitude
6,Mühlebach,47.255395,8.695773
31,Affoltern,47.278247,8.452152
10,Wollishofen,47.342427,8.530708
18,Weinegg,47.351058,8.56991
9,Seefeld,47.357783,8.551074
3,Friesenberg,47.358174,8.493922
17,Witikon,47.35831,8.590628
28,Enge,47.361789,8.528708
15,Hirslanden,47.362948,8.564269
22,Alt-Wiedikon,47.365562,8.517851


In [16]:
df.drop(6, inplace=True)
df.drop(31, inplace=True)
df.head()

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Sihlfeld,47.373218,8.51082
1,Rathaus,47.372649,8.544311
2,Oberstrass,47.385706,8.549124
3,Friesenberg,47.358174,8.493922
4,Saatlen,47.412637,8.565912


### Save neighborhood coordinates dataset

In [3]:
# df.to_csv('data/zurich_neighborhood_coords.csv')

# Load instead
df = pd.read_csv('data/zurich_neighborhood_coords.csv', index_col=0)
df

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Sihlfeld,47.373218,8.51082
1,Rathaus,47.372649,8.544311
2,Oberstrass,47.385706,8.549124
3,Friesenberg,47.358174,8.493922
4,Saatlen,47.412637,8.565912
5,City,47.372943,8.535346
7,Fluntern,47.376777,8.558775
8,Hottingen,47.36968,8.555082
9,Seefeld,47.357783,8.551074
10,Wollishofen,47.342427,8.530709


### Fetch neighborhood venues from Foursquare

In [4]:
venues = fetch_venues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )
venues

Unnamed: 0,Neighborhood,Latitude,Longitude,Venue,Venue Category
0,Sihlfeld,47.373218,8.510820,Bottega Berta,Italian Restaurant
1,Sihlfeld,47.373218,8.510820,Le Mezzerie,Lebanese Restaurant
2,Sihlfeld,47.373218,8.510820,Friedhof Sihlfeld,Cemetery
3,Sihlfeld,47.373218,8.510820,Tapas & Friends,Tapas Restaurant
4,Rathaus,47.372649,8.544311,Café Schober,Café
...,...,...,...,...,...
437,Wipkingen,47.393495,8.528602,Flying Pizza,Pizza Place
438,Wipkingen,47.393495,8.528602,ototo,Japanese Restaurant
439,Wipkingen,47.393495,8.528602,Bahnhof Zürich Wipkingen,Train Station
440,Wipkingen,47.393495,8.528602,Tre Frattelli,Italian Restaurant


In [5]:
venues.to_csv('data/zurich_neighborhood_venues.csv')

# Load instead
# venues = pd.read_csv('data/zurich_neighborhood_venues.csv', index_col=0)
# venues

### Find most frequent venue types per neighborhood

In [8]:
# rows sum to 1
frequency = venue_frequency(venues)
frequency

   Neighborhood  Accessories Store  American Restaurant  Antique Shop  \
0   Albisrieden                0.0                  0.0          0.00   
1  Alt-Wiedikon                0.0                  0.0          0.25   
2    Altstetten                0.0                  0.0          0.00   
3          City                0.0                  0.0          0.00   
4          Enge                0.0                  0.0          0.00   

   Argentinian Restaurant  Art Museum  Arts & Crafts Store  Asian Restaurant  \
0                     0.0    0.000000                  0.0            0.0000   
1                     0.0    0.000000                  0.0            0.0000   
2                     0.0    0.000000                  0.0            0.0625   
3                     0.0    0.029412                  0.0            0.0000   
4                     0.0    0.000000                  0.0            0.0000   

   Automotive Shop    Bakery  ...  Tibetan Restaurant  Train Station  \
0       

Unnamed: 0,Neighborhood,Accessories Store,American Restaurant,Antique Shop,Argentinian Restaurant,Art Museum,Arts & Crafts Store,Asian Restaurant,Automotive Shop,Bakery,...,Tibetan Restaurant,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Water Park,Wine Bar,Wine Shop,Women's Store,Zoo Exhibit
0,Albisrieden,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,...,0.0,0.0,0.111111,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Alt-Wiedikon,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Altstetten,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,...,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,City,0.0,0.0,0.0,0.0,0.029412,0.0,0.0,0.029412,0.029412,...,0.0,0.0,0.029412,0.058824,0.029412,0.0,0.0,0.029412,0.0,0.0
4,Enge,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0
5,Escher Wyss,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Fluntern,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,...,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Friesenberg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Gewerbeschule,0.021277,0.021277,0.0,0.0,0.0,0.0,0.021277,0.0,0.042553,...,0.0,0.0,0.0,0.021277,0.021277,0.0,0.021277,0.0,0.0,0.0
9,Hard,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
frequency.to_csv('data/zurich_neighborhood_venues_frequency.csv')

In [10]:
ranked = rank_venues_by_frequency(frequency)
ranked

   Neighborhood 1st Most Common Venue 2nd Most Common Venue  \
0   Albisrieden      Swiss Restaurant                Bakery   
1  Alt-Wiedikon         Burrito Place                   Bar   
2    Altstetten           Supermarket      Swiss Restaurant   
3          City                 Hotel      Department Store   
4          Enge      Swiss Restaurant                   Bar   

  3rd Most Common Venue 4th Most Common Venue          5th Most Common Venue  \
0           Supermarket            Restaurant                           Café   
1          Antique Shop            Restaurant              French Restaurant   
2     French Restaurant                 Plaza                   Dessert Shop   
3                   Bar          Cocktail Bar  Vegetarian / Vegan Restaurant   
4             Wine Shop                 Plaza             Italian Restaurant   

  6th Most Common Venue 7th Most Common Venue 8th Most Common Venue  \
0          Tram Station         Grocery Store           Bus Station  

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Albisrieden,Swiss Restaurant,Bakery,Supermarket,Restaurant,Café,Tram Station,Grocery Store,Bus Station,Falafel Restaurant,Flea Market
1,Alt-Wiedikon,Burrito Place,Bar,Antique Shop,Restaurant,French Restaurant,Food & Drink Shop,Flea Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant
2,Altstetten,Supermarket,Swiss Restaurant,French Restaurant,Plaza,Dessert Shop,Hotel,Italian Restaurant,Mexican Restaurant,Tram Station,Asian Restaurant
3,City,Hotel,Department Store,Bar,Cocktail Bar,Vegetarian / Vegan Restaurant,Organic Grocery,Pool,Coffee Shop,Cuban Restaurant,Cupcake Shop
4,Enge,Swiss Restaurant,Bar,Wine Shop,Plaza,Italian Restaurant,Café,Zoo Exhibit,Flea Market,Fast Food Restaurant,Farmers Market
5,Escher Wyss,Hotel,Cafeteria,Business Service,Shopping Mall,Dance Studio,Cambodian Restaurant,Factory,Supermarket,Electronics Store,Light Rail Station
6,Fluntern,Bakery,Plaza,Supermarket,Gastropub,Tram Station,Flea Market,Fast Food Restaurant,Farmers Market,Falafel Restaurant,Factory
7,Friesenberg,Historic Site,Zoo Exhibit,Fried Chicken Joint,Department Store,Dessert Shop,Diner,Discount Store,Eastern European Restaurant,Electronics Store,Factory
8,Gewerbeschule,Bar,Italian Restaurant,Café,Bakery,Indian Restaurant,Hostel,Hotel,Brazilian Restaurant,Burger Joint,Restaurant
9,Hard,Plaza,Supermarket,Gas Station,Pizza Place,Dance Studio,Historic Site,Gym / Fitness Center,Hotel,Department Store,Hostel


### Save ranked venues dataset

In [11]:
ranked.to_csv('data/zurich_neighborhood_venues_ranked.csv')