In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda/envs/DSX-Python35:

The following NEW packages will be INSTALLED:

    altair:  2.2.2-py35_1 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge

altair-2.2.2-p 100% |################################| Time: 0:00:00  52.70 MB/s
branca-0.3.1-p 100% |################################| Time: 0:00:00  32.32 MB/s
vincent-0.4.4- 100% |################################| Time: 0:00:00  36.58 MB/s
folium-0.5.0-p 100% |################################| Time: 0:00:00  42.94 MB/s
Libraries imported.


## 1. Download and Explore Leuven NeighborhoosDataset

Neighborhood has a total of 1 boroughs and 20 neighborhoods. In order to segement the neighborhoods and explore them, we will essentially need a dataset that contains the latitude and logitude coordinates of each neighborhood. 

Luckily, this dataset exists for free on the web. Feel free to try to find this dataset on your own, but here is the link to the dataset: https://www.postalpinzipcodes.com/Postcode-BEL-Belgium-Postcode-3000-ZIP-Code

In [2]:
df1 = pd.read_html('https://www.postalpinzipcodes.com/Postcode-BEL-Belgium-Postcode-3000-ZIP-Code')[1]
print('Data downloaded!')
df1

Data downloaded!


Unnamed: 0,0,1
0,List of Near by Post Offices,
1,Post Office Name,Postal Code
2,Postcode Leuven,ZIP Code 3000
3,Postcode Leuven,ZIP Code 3001
4,Postcode Leuven,ZIP Code 3010
5,Postcode Leuven,ZIP Code 3012
6,Postcode Leuven,ZIP Code 3018
7,Postcode Leuven Heverlee,ZIP Code 3001
8,Postcode Leuven Kessel-Lo,ZIP Code 3010
9,Postcode Leuven Wijgmaal,ZIP Code 3018


In [3]:
df1 = pd.read_html('https://www.postalpinzipcodes.com/Postcode-BEL-Belgium-Postcode-3000-ZIP-Code')[1]
print('Data downloaded!')

# Data preparation

#removing text from zip code columns
df1[1].replace(regex=True,inplace=True,to_replace=r'\D',value=r'')

#removing text "Postcode"
df=df1.replace(to_replace=r'Postcode', value='', regex=True)
df = df.iloc[1:]
df = df.iloc[1:]

df = df.drop_duplicates([0])
df = df.drop_duplicates([1])

#converting data type
df[1]=pd.to_numeric(df[1])
df[1].astype(int)

#checking structure data types
df.dtypes


Data downloaded!


0    object
1     int64
dtype: object

In [4]:
df.head()

Unnamed: 0,0,1
2,Leuven,3000
7,Leuven Heverlee,3001
8,Leuven Kessel-Lo,3010
9,Leuven Wijgmaal,3018
10,Leuven Wilsele,3012


#### Tranform the data into a *pandas* dataframe

In [5]:
df.columns = ['Neighborhood','PostalCode']
df.reset_index()
df.head()

Unnamed: 0,Neighborhood,PostalCode
2,Leuven,3000
7,Leuven Heverlee,3001
8,Leuven Kessel-Lo,3010
9,Leuven Wijgmaal,3018
10,Leuven Wilsele,3012


## 2. Reading Leuven neighborhoods latitude and longitud data from CSV

In [8]:
df_g1 = pd.read_html('https://github.com/jief/zipcode-belgium/blob/master/zipcode-belgium.csv')[0]
df_g1.columns = ['index','PostalCode','City','Longitude','Latitude']
df_g = df_g1.drop_duplicates('PostalCode',keep='first', inplace=False).reset_index()
df_g.head()

Unnamed: 0,level_0,index,PostalCode,City,Longitude,Latitude
0,0,,1020,Laeken,4.348713,50.883392
1,1,,1030,Schaerbeek,4.373712,50.867604
2,2,,1040,Etterbeek,4.38951,50.836851
3,3,,1050,Ixelles,4.381571,50.822285
4,4,,1060,Saint-Gilles,4.345668,50.826741


## 3. Then let's loop through the data and fill the dataframe one row at a time.

In [9]:
#Creating arrays to collect lat and long and later insert them inton the data frame
Lat = []
Lon = []
Bor = []

for index, row in df.iterrows():
    for index2, row2 in df_g.iterrows():
        if row['PostalCode'] == row2['PostalCode']:
            Lat.append(row2['Latitude'])
            Lon.append(row2['Longitude'])
            Bor.append('Leuven')
print("Lat length:", len(Lat))
print("df shape", df.shape)

df.insert(loc=2, column='Latitude', value=Lat)
df.insert(loc=3, column='Longitude', value=Lon)
df.insert(loc=4, column='Borough', value=Bor)

df

Lat length: 12
df shape (12, 2)


Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Borough
2,Leuven,3000,50.881253,4.69299,Leuven
7,Leuven Heverlee,3001,50.851729,4.693131,Leuven
8,Leuven Kessel-Lo,3010,50.889915,4.730761,Leuven
9,Leuven Wijgmaal,3018,50.926428,4.700121,Leuven
10,Leuven Wilsele,3012,50.909536,4.713629,Leuven
11,Bierbeek Korbeek-Lo,3360,50.824124,4.771432,Leuven
12,Herent,3020,50.907933,4.672714,Leuven
14,Bertem Korbeek-Dijle,3060,50.870647,4.632291,Leuven
17,Lubbeek Linden,3210,50.898911,4.775685,Leuven
18,Oud-Heverlee Blanden,3052,50.828167,4.70568,Leuven


## 4. Let's get the geographical coordinates of Leuven.


In [10]:
#Let's get the geographical coordinates of Manhattan.

address = 'Leuven, Belgium'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Leuven City are {}, {}.'.format(latitude, longitude))
#50.8842428,4.5653441

The geograpical coordinate of Leuven City are 50.879202, 4.7011675.


In [11]:
#Reseting index
df = df[df['Borough'] == 'Leuven'].reset_index(drop=True)
df.head()

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Borough
0,Leuven,3000,50.881253,4.69299,Leuven
1,Leuven Heverlee,3001,50.851729,4.693131,Leuven
2,Leuven Kessel-Lo,3010,50.889915,4.730761,Leuven
3,Leuven Wijgmaal,3018,50.926428,4.700121,Leuven
4,Leuven Wilsele,3012,50.909536,4.713629,Leuven


In [13]:
map_leuven = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, borough, neighborhood in zip(df['Latitude'], df['Longitude'], df['Borough'], df['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_leuven)  

print("Map created !")
map_leuven


Map created !


## 5. Define Foursquare Credentials and Version

In [14]:
CLIENT_ID = 'XWDA1E3CTCFKLJYGMVKMANXD512J0JPA4U5BGQPPX5FPCOZH' # your Foursquare ID
CLIENT_SECRET = 'D0W5TYEBNLJGHMFFXASIF13EIQ5W5GMMGKSUPUP35AD0LIBY' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

LIMIT = 100 # limit of number of venues returned by Foursquare API
radius = 500 # define radius

Your credentails:
CLIENT_ID: XWDA1E3CTCFKLJYGMVKMANXD512J0JPA4U5BGQPPX5FPCOZH
CLIENT_SECRET:D0W5TYEBNLJGHMFFXASIF13EIQ5W5GMMGKSUPUP35AD0LIBY


In [15]:
#### Defining a function to get venues 
def getNearbyVenues(names, latitudes, longitudes, radius=500):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['Neighborhood', 
                  'Neighborhood Latitude', 
                  'Neighborhood Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [16]:
leuven_venues = getNearbyVenues(names=df['Neighborhood'],
                                   latitudes=df['Latitude'],
                                   longitudes=df['Longitude']
                                  )

 Leuven
 Leuven Heverlee
 Leuven Kessel-Lo
 Leuven Wijgmaal
 Leuven Wilsele
 Bierbeek Korbeek-Lo
 Herent
 Bertem Korbeek-Dijle
 Lubbeek Linden
 Oud-Heverlee Blanden
 Oud-Heverlee Vaalbeek
 Holsbeek


#### Checking the resulting data frame

In [17]:
print(leuven_venues.shape)
leuven_venues

(168, 7)


Unnamed: 0,Neighborhood,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
0,Leuven,50.881253,4.69299,Bar Berlin,50.880699,4.692575,Coffee Shop
1,Leuven,50.881253,4.69299,Bakkerij Gielis,50.880424,4.695632,Bakery
2,Leuven,50.881253,4.69299,Pizzeria l'Aurora,50.88126,4.690344,Pizza Place
3,Leuven,50.881253,4.69299,Dijleterrassen,50.881423,4.69698,Plaza
4,Leuven,50.881253,4.69299,Villa de Frit,50.882946,4.693647,Friterie
5,Leuven,50.881253,4.69299,De Frittoerist,50.879483,4.690391,Friterie
6,Leuven,50.881253,4.69299,El Sombrero,50.881635,4.696864,Mexican Restaurant
7,Leuven,50.881253,4.69299,Martin's Klooster Hotel,50.879243,4.695844,Hotel
8,Leuven,50.881253,4.69299,Digame,50.88071,4.696497,Electronics Store
9,Leuven,50.881253,4.69299,Den Bruul,50.88315,4.695368,Park


## 6. Analizing data

In [18]:
#Let's check how many venues were returned for each neighborhood
leuven_venues.groupby('Neighborhood').count()

Unnamed: 0_level_0,Neighborhood Latitude,Neighborhood Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Bertem Korbeek-Dijle,6,6,6,6,6,6
Bierbeek Korbeek-Lo,6,6,6,6,6,6
Herent,31,31,31,31,31,31
Holsbeek,12,12,12,12,12,12
Leuven,61,61,61,61,61,61
Leuven Heverlee,4,4,4,4,4,4
Leuven Kessel-Lo,10,10,10,10,10,10
Leuven Wijgmaal,15,15,15,15,15,15
Leuven Wilsele,12,12,12,12,12,12
Lubbeek Linden,3,3,3,3,3,3


#### Let's find out how many unique categories can be curated from all the returned venues

In [19]:
print('There are {} uniques categories.'.format(len(leuven_venues['Venue Category'].unique())))

There are 81 uniques categories.


#### Analyze Each Neighborhood

In [20]:
# one hot encoding
leuven_onehot = pd.get_dummies(leuven_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
leuven_onehot['Neighborhood'] = leuven_venues['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [leuven_onehot.columns[-1]] + list(leuven_onehot.columns[:-1])
leuven_onehot = leuven_onehot[fixed_columns]

leuven_onehot

Unnamed: 0,Neighborhood,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Basketball Court,Bed & Breakfast,Bistro,Boarding House,Bookstore,Botanical Garden,Boutique,Bowling Alley,Brasserie,Burger Joint,Bus Stop,Café,Cheese Shop,Child Care Service,Chinese Restaurant,Clothing Store,Coffee Shop,Comedy Club,Construction & Landscaping,Convenience Store,Dance Studio,Deli / Bodega,Dessert Shop,Electronics Store,Flower Shop,Forest,French Restaurant,Friterie,Gourmet Shop,Gym,Gym / Fitness Center,Health & Beauty Service,Hostel,Hotel,Indian Restaurant,Indoor Play Area,Intersection,Italian Restaurant,Jewelry Store,Kids Store,Massage Studio,Mexican Restaurant,Mini Golf,Miscellaneous Shop,Notary,Optical Shop,Organic Grocery,Outdoors & Recreation,Park,Pastry Shop,Pet Store,Pharmacy,Pie Shop,Pizza Place,Platform,Playground,Plaza,Post Office,Public Art,Restaurant,Road,Sandwich Place,Shoe Store,Snack Place,Soccer Field,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Toy / Game Store,Train Station,Turkish Restaurant,Wine Bar,Wine Shop
0,Leuven,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Leuven,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Leuven,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Leuven,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Leuven,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Leuven,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Leuven,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Leuven,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Leuven,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Leuven,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [21]:
leuven_onehot.shape

(168, 82)

#### Next, let's group rows by neighborhood and by taking the mean of the frequency of occurrence of each category

In [22]:
leuven_grouped = leuven_onehot.groupby('Neighborhood').mean().reset_index()
leuven_grouped

Unnamed: 0,Neighborhood,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Basketball Court,Bed & Breakfast,Bistro,Boarding House,Bookstore,Botanical Garden,Boutique,Bowling Alley,Brasserie,Burger Joint,Bus Stop,Café,Cheese Shop,Child Care Service,Chinese Restaurant,Clothing Store,Coffee Shop,Comedy Club,Construction & Landscaping,Convenience Store,Dance Studio,Deli / Bodega,Dessert Shop,Electronics Store,Flower Shop,Forest,French Restaurant,Friterie,Gourmet Shop,Gym,Gym / Fitness Center,Health & Beauty Service,Hostel,Hotel,Indian Restaurant,Indoor Play Area,Intersection,Italian Restaurant,Jewelry Store,Kids Store,Massage Studio,Mexican Restaurant,Mini Golf,Miscellaneous Shop,Notary,Optical Shop,Organic Grocery,Outdoors & Recreation,Park,Pastry Shop,Pet Store,Pharmacy,Pie Shop,Pizza Place,Platform,Playground,Plaza,Post Office,Public Art,Restaurant,Road,Sandwich Place,Shoe Store,Snack Place,Soccer Field,Sporting Goods Shop,Steakhouse,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Toy / Game Store,Train Station,Turkish Restaurant,Wine Bar,Wine Shop
0,Bertem Korbeek-Dijle,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667
1,Bierbeek Korbeek-Lo,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Herent,0.0,0.0,0.0,0.064516,0.032258,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.032258,0.0,0.064516,0.0,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.032258,0.032258,0.032258,0.0,0.0,0.032258,0.0,0.0,0.096774,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.0,0.032258,0.032258,0.0,0.0,0.0,0.0,0.032258,0.032258,0.0,0.0,0.032258,0.0,0.032258,0.032258,0.0,0.0,0.032258,0.0,0.0,0.0,0.0,0.032258,0.0,0.032258,0.032258,0.0,0.0,0.032258,0.0,0.032258,0.032258,0.0,0.0,0.0,0.032258,0.0,0.0,0.0
3,Holsbeek,0.0,0.083333,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.166667,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0
4,Leuven,0.016393,0.0,0.016393,0.032787,0.131148,0.0,0.0,0.016393,0.0,0.016393,0.016393,0.0,0.0,0.0,0.0,0.016393,0.0,0.016393,0.0,0.0,0.016393,0.04918,0.0,0.0,0.0,0.0,0.016393,0.016393,0.016393,0.016393,0.0,0.016393,0.032787,0.032787,0.0,0.016393,0.0,0.016393,0.016393,0.016393,0.0,0.0,0.065574,0.0,0.0,0.016393,0.016393,0.0,0.016393,0.0,0.016393,0.016393,0.0,0.032787,0.016393,0.0,0.016393,0.016393,0.032787,0.0,0.0,0.032787,0.0,0.0,0.0,0.0,0.032787,0.0,0.0,0.0,0.016393,0.0,0.032787,0.0,0.016393,0.016393,0.016393,0.0,0.0,0.016393,0.0
5,Leuven Heverlee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,Leuven Kessel-Lo,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Leuven Wijgmaal,0.0,0.066667,0.0,0.133333,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.133333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.0,0.066667,0.066667,0.0,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.066667,0.0,0.0,0.0,0.0,0.0,0.066667,0.0,0.0,0.0
8,Leuven Wilsele,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.083333,0.0,0.0,0.083333,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.083333,0.083333,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Lubbeek Linden,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Let's print each neighborhood along with the top 5 most common venues

In [23]:
num_top_venues = 5

for hood in leuven_grouped['Neighborhood']:
    print("----"+hood+"----")
    temp = leuven_grouped[leuven_grouped['Neighborhood'] == hood].T.reset_index()
    temp.columns = ['venue','freq']
    temp = temp.iloc[1:]
    temp['freq'] = temp['freq'].astype(float)
    temp = temp.round({'freq': 2})
    print(temp.sort_values('freq', ascending=False).reset_index(drop=True).head(num_top_venues))
    print('\n')

---- Bertem Korbeek-Dijle----
             venue  freq
0        Wine Shop  0.17
1  Bed & Breakfast  0.17
2         Pharmacy  0.17
3         Bus Stop  0.17
4      Post Office  0.17


---- Bierbeek Korbeek-Lo----
                        venue  freq
0             Bed & Breakfast  0.33
1                 Comedy Club  0.17
2     Health & Beauty Service  0.17
3                    Bus Stop  0.17
4  Construction & Landscaping  0.17


---- Herent----
              venue  freq
0          Friterie  0.10
1            Bakery  0.06
2          Bus Stop  0.06
3  Indoor Play Area  0.03
4          Platform  0.03


---- Holsbeek----
               venue  freq
0           Bus Stop  0.17
1       Soccer Field  0.08
2       Burger Joint  0.08
3  Convenience Store  0.08
4  French Restaurant  0.08


---- Leuven----
                venue  freq
0                 Bar  0.13
1  Italian Restaurant  0.07
2         Coffee Shop  0.05
3         Pizza Place  0.03
4            Friterie  0.03


---- Leuven Heverlee----
    

#### Let's put that into a *pandas* dataframe

First, let's write a function to sort the venues in descending order.

In [24]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [25]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = leuven_grouped['Neighborhood']

for ind in np.arange(leuven_grouped.shape[0]):
    neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(leuven_grouped.iloc[ind, :], num_top_venues)

neighborhoods_venues_sorted.head(20)

Unnamed: 0,Neighborhood,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Bertem Korbeek-Dijle,Wine Shop,Bed & Breakfast,Post Office,Bus Stop,Pharmacy,Bakery,Gym,Deli / Bodega,Comedy Club,Construction & Landscaping
1,Bierbeek Korbeek-Lo,Bed & Breakfast,Comedy Club,Health & Beauty Service,Construction & Landscaping,Bus Stop,Wine Shop,Electronics Store,Convenience Store,Dance Studio,Deli / Bodega
2,Herent,Friterie,Bus Stop,Bakery,Indoor Play Area,Chinese Restaurant,Pet Store,Park,Optical Shop,Notary,Kids Store
3,Holsbeek,Bus Stop,Turkish Restaurant,Athletics & Sports,Bakery,Convenience Store,Dance Studio,Café,French Restaurant,Soccer Field,Pet Store
4,Leuven,Bar,Italian Restaurant,Coffee Shop,Park,Supermarket,Pizza Place,Plaza,Gourmet Shop,Sandwich Place,Bakery
5,Leuven Heverlee,Bus Stop,Supermarket,Boarding House,Wine Shop,Flower Shop,Comedy Club,Construction & Landscaping,Convenience Store,Dance Studio,Deli / Bodega
6,Leuven Kessel-Lo,Bar,Child Care Service,Notary,Park,Plaza,Bus Stop,Road,Mini Golf,Gym,Forest
7,Leuven Wijgmaal,Bus Stop,Bakery,Steakhouse,Gym / Fitness Center,Platform,Playground,Public Art,Soccer Field,Intersection,Outdoors & Recreation
8,Leuven Wilsele,Bus Stop,Bowling Alley,Indian Restaurant,Road,Gym / Fitness Center,Basketball Court,Friterie,Supermarket,Flower Shop,Sandwich Place
9,Lubbeek Linden,Forest,Bus Stop,Wine Shop,Electronics Store,Comedy Club,Construction & Landscaping,Convenience Store,Dance Studio,Deli / Bodega,Dessert Shop


#### Clustering 

In [26]:
# set number of clusters
kclusters = 5

leuven_grouped_clustering = leuven_grouped.drop('Neighborhood', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(leuven_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 4, 0, 0, 0, 3, 0, 0, 0, 1], dtype=int32)

In [27]:
# add clustering labels
neighborhoods_venues_sorted.insert(0, 'Cluster Labels', kmeans.labels_)

leuven_merged = df

# merge leuven_grouped with leuven_data to add latitude/longitude for each neighborhood
leuven_merged = leuven_merged.join(neighborhoods_venues_sorted.set_index('Neighborhood'), on='Neighborhood')

leuven_merged.head() # check the last columns!

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Leuven,3000,50.881253,4.69299,Leuven,0,Bar,Italian Restaurant,Coffee Shop,Park,Supermarket,Pizza Place,Plaza,Gourmet Shop,Sandwich Place,Bakery
1,Leuven Heverlee,3001,50.851729,4.693131,Leuven,3,Bus Stop,Supermarket,Boarding House,Wine Shop,Flower Shop,Comedy Club,Construction & Landscaping,Convenience Store,Dance Studio,Deli / Bodega
2,Leuven Kessel-Lo,3010,50.889915,4.730761,Leuven,0,Bar,Child Care Service,Notary,Park,Plaza,Bus Stop,Road,Mini Golf,Gym,Forest
3,Leuven Wijgmaal,3018,50.926428,4.700121,Leuven,0,Bus Stop,Bakery,Steakhouse,Gym / Fitness Center,Platform,Playground,Public Art,Soccer Field,Intersection,Outdoors & Recreation
4,Leuven Wilsele,3012,50.909536,4.713629,Leuven,0,Bus Stop,Bowling Alley,Indian Restaurant,Road,Gym / Fitness Center,Basketball Court,Friterie,Supermarket,Flower Shop,Sandwich Place


In [28]:
leuven_merged

Unnamed: 0,Neighborhood,PostalCode,Latitude,Longitude,Borough,Cluster Labels,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Leuven,3000,50.881253,4.69299,Leuven,0,Bar,Italian Restaurant,Coffee Shop,Park,Supermarket,Pizza Place,Plaza,Gourmet Shop,Sandwich Place,Bakery
1,Leuven Heverlee,3001,50.851729,4.693131,Leuven,3,Bus Stop,Supermarket,Boarding House,Wine Shop,Flower Shop,Comedy Club,Construction & Landscaping,Convenience Store,Dance Studio,Deli / Bodega
2,Leuven Kessel-Lo,3010,50.889915,4.730761,Leuven,0,Bar,Child Care Service,Notary,Park,Plaza,Bus Stop,Road,Mini Golf,Gym,Forest
3,Leuven Wijgmaal,3018,50.926428,4.700121,Leuven,0,Bus Stop,Bakery,Steakhouse,Gym / Fitness Center,Platform,Playground,Public Art,Soccer Field,Intersection,Outdoors & Recreation
4,Leuven Wilsele,3012,50.909536,4.713629,Leuven,0,Bus Stop,Bowling Alley,Indian Restaurant,Road,Gym / Fitness Center,Basketball Court,Friterie,Supermarket,Flower Shop,Sandwich Place
5,Bierbeek Korbeek-Lo,3360,50.824124,4.771432,Leuven,4,Bed & Breakfast,Comedy Club,Health & Beauty Service,Construction & Landscaping,Bus Stop,Wine Shop,Electronics Store,Convenience Store,Dance Studio,Deli / Bodega
6,Herent,3020,50.907933,4.672714,Leuven,0,Friterie,Bus Stop,Bakery,Indoor Play Area,Chinese Restaurant,Pet Store,Park,Optical Shop,Notary,Kids Store
7,Bertem Korbeek-Dijle,3060,50.870647,4.632291,Leuven,0,Wine Shop,Bed & Breakfast,Post Office,Bus Stop,Pharmacy,Bakery,Gym,Deli / Bodega,Comedy Club,Construction & Landscaping
8,Lubbeek Linden,3210,50.898911,4.775685,Leuven,1,Forest,Bus Stop,Wine Shop,Electronics Store,Comedy Club,Construction & Landscaping,Convenience Store,Dance Studio,Deli / Bodega,Dessert Shop
9,Oud-Heverlee Blanden,3052,50.828167,4.70568,Leuven,0,Bar,Wine Shop,Snack Place,Bakery,Restaurant,Comedy Club,Construction & Landscaping,Convenience Store,Dance Studio,Deli / Bodega


## 7. Creating map with sorted venues in Leuven

In [29]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(leuven_merged['Latitude'], leuven_merged['Longitude'], leuven_merged['Neighborhood'], leuven_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters