In [1]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files
import csv
import io

#!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Libraries imported.


## 1: Scraping names of districts in Oslo from Wikipedia

In [2]:
url = 'https://no.wikipedia.org/wiki/Liste_over_Oslos_bydeler'

table = pd.read_html(url, header=0,keep_default_na=False) 

oslo_df = table[0]

oslo_df

Unnamed: 0,Bydel,Innbyggere[1],Areal i km²,Bydelsnummer
0,Alna,49 358,137,12
1,Bjerke,31 973,77,9
2,Frogner,58 283,83,5
3,Gamle Oslo,54 575,75,1
4,Grorud,27 525,82,10
5,Grünerløkka,58 906,48,2
6,Nordre Aker,50 724,136,8
7,Nordstrand,51 169,169,14
8,Sagene,43 131,31,3
9,St. Hanshaugen,38 109,36,4


## 2: Translating table into English

In [3]:
oslo_df = oslo_df.rename(index=int, columns={"Bydel": "District", "Innbyggere[1]": "Inhabitants", "Areal i km²": "Area in km²",
                                            "Bydelsnummer": "Area code"})
oslo_df

Unnamed: 0,District,Inhabitants,Area in km²,Area code
0,Alna,49 358,137,12
1,Bjerke,31 973,77,9
2,Frogner,58 283,83,5
3,Gamle Oslo,54 575,75,1
4,Grorud,27 525,82,10
5,Grünerløkka,58 906,48,2
6,Nordre Aker,50 724,136,8
7,Nordstrand,51 169,169,14
8,Sagene,43 131,31,3
9,St. Hanshaugen,38 109,36,4


## 3: Adding dd location to Districts 

In [4]:
oslo_df['Latitude'] = 0.0
oslo_df['Longitude'] = 0.0
oslo_df

Unnamed: 0,District,Inhabitants,Area in km²,Area code,Latitude,Longitude
0,Alna,49 358,137,12,0.0,0.0
1,Bjerke,31 973,77,9,0.0,0.0
2,Frogner,58 283,83,5,0.0,0.0
3,Gamle Oslo,54 575,75,1,0.0,0.0
4,Grorud,27 525,82,10,0.0,0.0
5,Grünerløkka,58 906,48,2,0.0,0.0
6,Nordre Aker,50 724,136,8,0.0,0.0
7,Nordstrand,51 169,169,14,0.0,0.0
8,Sagene,43 131,31,3,0.0,0.0
9,St. Hanshaugen,38 109,36,4,0.0,0.0


In [5]:
# Data gathered from https://latitude.to and inserted manually

oslo_df.iat[0, 4] = 59.93108 # Alna
oslo_df.iat[0, 5] = 10.87250 # Alna

oslo_df.iat[1, 4] = 59.93749625 # Bjerke
oslo_df.iat[1, 5] = 10.8083301 # Bjerke

oslo_df.iat[2, 4] = 59.91674 # Frogner
oslo_df.iat[2, 5] = 10.70684 # Frogner

oslo_df.iat[3, 4] = 59.90647 # Gamle Oslo
oslo_df.iat[3, 5] = 10.78142 # Gamle Oslo

oslo_df.iat[4, 4] = 59.95967 # Grorud
oslo_df.iat[4, 5] = 10.88268 # Grorud

oslo_df.iat[5, 4] = 59.92387 # Grünerløkka
oslo_df.iat[5, 5] = 10.75784 # Grünerløkka

oslo_df.iat[6, 4] = 59.95613 # Nordre Aker (Sogn)
oslo_df.iat[6, 5] = 10.74658 # Nordre Aker (Sogn)

oslo_df.iat[7, 4] = 59.87883 # Nordstrand
oslo_df.iat[7, 5] = 10.79080 # Nordstrand

oslo_df.iat[8, 4] = 59.93789 # Sagene
oslo_df.iat[8, 5] = 10.75592 # Sagene

oslo_df.iat[9, 4] = 59.92875 # St. Hanshaugen
oslo_df.iat[9, 5] = 10.74168 # St. Hanshaugen

oslo_df.iat[10, 4] = 59.96085 # Stovner
oslo_df.iat[10, 5] = 10.92239 # Stovner

oslo_df.iat[11, 4] = 59.82957 # Søndre Nordstrand
oslo_df.iat[11, 5] = 10.82768 # Søndre Nordstrand

oslo_df.iat[12, 4] = 59.92771 # Ullern
oslo_df.iat[12, 5] = 10.65509 # Ullern

oslo_df.iat[13, 4] = 59.94753 # Vestre Aker (Røa)
oslo_df.iat[13, 5] = 10.64248 # Vestre Aker (Røa)

oslo_df.iat[14, 4] = 59.89024 # Østensjø
oslo_df.iat[14, 5] = 10.83858 # Østensjø

oslo_df

Unnamed: 0,District,Inhabitants,Area in km²,Area code,Latitude,Longitude
0,Alna,49 358,137,12,59.93108,10.8725
1,Bjerke,31 973,77,9,59.937496,10.80833
2,Frogner,58 283,83,5,59.91674,10.70684
3,Gamle Oslo,54 575,75,1,59.90647,10.78142
4,Grorud,27 525,82,10,59.95967,10.88268
5,Grünerløkka,58 906,48,2,59.92387,10.75784
6,Nordre Aker,50 724,136,8,59.95613,10.74658
7,Nordstrand,51 169,169,14,59.87883,10.7908
8,Sagene,43 131,31,3,59.93789,10.75592
9,St. Hanshaugen,38 109,36,4,59.92875,10.74168


## Create a map of Oslo with Neigbourhoods superimposed: 

In [6]:
address = 'Oslo City'

geolocator = Nominatim(user_agent="oslo_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Oslo City are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Oslo City are 59.9126779, 10.7523904018659.


In [7]:
# create map of Oslo using latitude and longitude values
map_oslo = folium.Map(location=[latitude, longitude], zoom_start=11)


# add markers to map
for lat, lng, label in zip(oslo_df['Latitude'], oslo_df['Longitude'], oslo_df['District']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [59.956130, 10.74658],
        radius=10,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_oslo)

map_oslo

In [8]:
# create map of Oslo using latitude and longitude values
map_oslo = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(oslo_df['Latitude'], oslo_df['Longitude'], oslo_df['District']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=10,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_oslo)  
    
map_oslo

## Foursquare Credentials and Version

In [9]:
CLIENT_ID = '4NZBFUVFOURXVTTMKUF1NWN0AO4UK3SIQLUEGHNBHTACDESA' # your Foursquare ID
CLIENT_SECRET = 'VL2SNBIVN0HVPRUFAGCE2R3PTTGEOGGVJ4H4QBGYT1JJHD5T' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: 4NZBFUVFOURXVTTMKUF1NWN0AO4UK3SIQLUEGHNBHTACDESA
CLIENT_SECRET:VL2SNBIVN0HVPRUFAGCE2R3PTTGEOGGVJ4H4QBGYT1JJHD5T


## Accessing and downloading Foursquare venue data:

In [10]:
LIMIT = 100 # set limit on max number of venues

In [11]:
def getNearbyVenues(names, latitudes, longitudes, radius=2000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}&query=Food'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [12]:
oslo_venues = getNearbyVenues(names=oslo_df['District'],
                                   latitudes=oslo_df['Latitude'],
                                   longitudes=oslo_df['Longitude']
                                  )




Alna
Bjerke
Frogner
Gamle Oslo
Grorud
Grünerløkka
Nordre Aker
Nordstrand
Sagene
St. Hanshaugen
Stovner
Søndre Nordstrand
Ullern
Vestre Aker
Østensjø


In [13]:
print(oslo_venues.shape)

(630, 7)


In [14]:
oslo_venues.groupby('District').count()

Unnamed: 0_level_0,District Latitude,District Longitude,Venue,Venue Latitude,Venue Longitude,Venue Category
District,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Alna,6,6,6,6,6,6
Bjerke,34,34,34,34,34,34
Frogner,100,100,100,100,100,100
Gamle Oslo,100,100,100,100,100,100
Grorud,6,6,6,6,6,6
Grünerløkka,100,100,100,100,100,100
Nordre Aker,22,22,22,22,22,22
Nordstrand,11,11,11,11,11,11
Sagene,100,100,100,100,100,100
St. Hanshaugen,100,100,100,100,100,100


In [15]:
print('There are {} uniques categories.'.format(len(oslo_venues['Venue Category'].unique())))

There are 54 uniques categories.


In [16]:
# one hot encoding
oslo_onehot = pd.get_dummies(oslo_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
oslo_onehot['District'] = oslo_venues['District'] 

# move neighborhood column to the first column
fixed_columns = [oslo_onehot.columns[-1]] + list(oslo_onehot.columns[:-1])
oslo_onehot = oslo_onehot[fixed_columns]

oslo_onehot.head()

Unnamed: 0,District,American Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bistro,Breakfast Spot,Burger Joint,Burrito Place,Café,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Deli / Bodega,Diner,Dumpling Restaurant,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Food,Food Court,Food Truck,French Restaurant,Gastropub,Gluten-free Restaurant,Hot Dog Joint,Hungarian Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Noodle House,Pizza Place,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Scandinavian Restaurant,Seafood Restaurant,Snack Place,South American Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Alna,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Alna,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2,Alna,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Alna,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Alna,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [17]:
oslo_onehot.shape

(630, 55)

In [18]:
oslo_grouped = oslo_onehot.groupby('District').mean().reset_index().round(4)
oslo_grouped

Unnamed: 0,District,American Restaurant,Asian Restaurant,BBQ Joint,Bakery,Bistro,Breakfast Spot,Burger Joint,Burrito Place,Café,Cantonese Restaurant,Chinese Restaurant,Comfort Food Restaurant,Deli / Bodega,Diner,Dumpling Restaurant,Eastern European Restaurant,Falafel Restaurant,Fast Food Restaurant,Food,Food Court,Food Truck,French Restaurant,Gastropub,Gluten-free Restaurant,Hot Dog Joint,Hungarian Restaurant,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Kebab Restaurant,Korean Restaurant,Mediterranean Restaurant,Mexican Restaurant,Middle Eastern Restaurant,Modern European Restaurant,Noodle House,Pizza Place,Ramen Restaurant,Restaurant,Salad Place,Sandwich Place,Scandinavian Restaurant,Seafood Restaurant,Snack Place,South American Restaurant,Spanish Restaurant,Steakhouse,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Theme Restaurant,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant
0,Alna,0.0,0.0,0.0,0.1667,0.0,0.0,0.0,0.0,0.1667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bjerke,0.0,0.0882,0.0,0.1176,0.0,0.0,0.0,0.0,0.1176,0.0,0.0294,0.0,0.0,0.0,0.0,0.0,0.0,0.2059,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0882,0.0,0.0,0.0294,0.0,0.0,0.0,0.0,0.0,0.0,0.1471,0.0,0.0,0.0,0.0294,0.0,0.0,0.0,0.0,0.0,0.0,0.1176,0.0,0.0294,0.0,0.0,0.0,0.0
2,Frogner,0.01,0.04,0.01,0.08,0.0,0.0,0.06,0.0,0.09,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.07,0.06,0.03,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.07,0.0,0.05,0.01,0.02,0.12,0.04,0.0,0.01,0.0,0.02,0.04,0.02,0.04,0.0,0.0,0.01,0.0
3,Gamle Oslo,0.0,0.05,0.0,0.04,0.02,0.02,0.07,0.01,0.12,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.01,0.02,0.01,0.01,0.01,0.02,0.03,0.0,0.0,0.0,0.08,0.06,0.02,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.06,0.01,0.08,0.0,0.01,0.02,0.0,0.01,0.0,0.0,0.01,0.04,0.02,0.03,0.01,0.03,0.0,0.01
4,Grorud,0.0,0.1667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1667,0.0,0.0,0.1667,0.0,0.0,0.0,0.0,0.0,0.0,0.3333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Grünerløkka,0.0,0.04,0.02,0.07,0.01,0.01,0.06,0.01,0.09,0.0,0.02,0.01,0.01,0.01,0.0,0.01,0.01,0.02,0.0,0.02,0.0,0.04,0.0,0.01,0.01,0.0,0.09,0.04,0.03,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.08,0.01,0.05,0.0,0.0,0.05,0.01,0.0,0.0,0.01,0.01,0.01,0.03,0.03,0.0,0.01,0.02,0.01
6,Nordre Aker,0.0,0.0455,0.0,0.2273,0.0,0.0,0.0,0.0,0.0909,0.0,0.0,0.0,0.0455,0.0,0.0,0.0,0.0,0.0909,0.0,0.0,0.0,0.0,0.0455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0455,0.0,0.0,0.1364,0.0,0.0455,0.0,0.0,0.0,0.0,0.0455,0.0,0.0,0.0,0.1818,0.0,0.0,0.0,0.0,0.0,0.0
7,Nordstrand,0.0,0.0,0.0,0.0909,0.0,0.0,0.0,0.0,0.4545,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2727,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0909,0.0,0.0,0.0,0.0,0.0,0.0
8,Sagene,0.0,0.04,0.01,0.13,0.0,0.01,0.03,0.0,0.13,0.0,0.01,0.01,0.02,0.02,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.02,0.02,0.01,0.01,0.01,0.06,0.03,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.08,0.0,0.03,0.0,0.01,0.03,0.01,0.0,0.0,0.01,0.01,0.1,0.01,0.04,0.0,0.0,0.0,0.02
9,St. Hanshaugen,0.0,0.03,0.03,0.08,0.0,0.01,0.07,0.01,0.1,0.0,0.01,0.01,0.02,0.01,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.04,0.01,0.01,0.01,0.0,0.07,0.05,0.03,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.05,0.01,0.06,0.0,0.01,0.06,0.02,0.0,0.0,0.01,0.01,0.03,0.04,0.02,0.0,0.0,0.02,0.0


## Creating competition indicators:

In [19]:
direct_competition = oslo_grouped[['District', 'Chinese Restaurant', ]] 
direct_competition

Unnamed: 0,District,Chinese Restaurant
0,Alna,0.0
1,Bjerke,0.0294
2,Frogner,0.02
3,Gamle Oslo,0.01
4,Grorud,0.0
5,Grünerløkka,0.02
6,Nordre Aker,0.0
7,Nordstrand,0.0
8,Sagene,0.01
9,St. Hanshaugen,0.01


In [20]:
indirect_competition = oslo_grouped[['District', 'Asian Restaurant', 'Cantonese Restaurant', 'Dumpling Restaurant', 'Indian Restaurant',
                                    'Japanese Restaurant', 'Korean Restaurant', 'Noodle House', 'Ramen Restaurant', 'Sushi Restaurant',
                                    'Thai Restaurant', 'Vietnamese Restaurant']]
indirect_competition

Unnamed: 0,District,Asian Restaurant,Cantonese Restaurant,Dumpling Restaurant,Indian Restaurant,Japanese Restaurant,Korean Restaurant,Noodle House,Ramen Restaurant,Sushi Restaurant,Thai Restaurant,Vietnamese Restaurant
0,Alna,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bjerke,0.0882,0.0,0.0,0.0882,0.0,0.0,0.0,0.0,0.1176,0.0294,0.0
2,Frogner,0.04,0.01,0.0,0.07,0.03,0.01,0.0,0.0,0.04,0.04,0.0
3,Gamle Oslo,0.05,0.0,0.01,0.08,0.02,0.0,0.0,0.01,0.04,0.03,0.01
4,Grorud,0.1667,0.0,0.0,0.1667,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Grünerløkka,0.04,0.0,0.0,0.09,0.03,0.0,0.01,0.01,0.01,0.03,0.01
6,Nordre Aker,0.0455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1818,0.0,0.0
7,Nordstrand,0.0,0.0,0.0,0.0,0.0909,0.0,0.0,0.0,0.0909,0.0,0.0
8,Sagene,0.04,0.0,0.0,0.06,0.02,0.0,0.01,0.0,0.1,0.04,0.02
9,St. Hanshaugen,0.03,0.0,0.0,0.07,0.03,0.0,0.01,0.01,0.03,0.02,0.0


In [21]:
indirect_competition.dtypes

District                  object
Asian Restaurant         float64
Cantonese Restaurant     float64
Dumpling Restaurant      float64
Indian Restaurant        float64
Japanese Restaurant      float64
Korean Restaurant        float64
Noodle House             float64
Ramen Restaurant         float64
Sushi Restaurant         float64
Thai Restaurant          float64
Vietnamese Restaurant    float64
dtype: object

In [22]:
indirect_competition['Other Asian restaurants'] = 0.0
indirect_competition

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,District,Asian Restaurant,Cantonese Restaurant,Dumpling Restaurant,Indian Restaurant,Japanese Restaurant,Korean Restaurant,Noodle House,Ramen Restaurant,Sushi Restaurant,Thai Restaurant,Vietnamese Restaurant,Other Asian restaurants
0,Alna,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bjerke,0.0882,0.0,0.0,0.0882,0.0,0.0,0.0,0.0,0.1176,0.0294,0.0,0.0
2,Frogner,0.04,0.01,0.0,0.07,0.03,0.01,0.0,0.0,0.04,0.04,0.0,0.0
3,Gamle Oslo,0.05,0.0,0.01,0.08,0.02,0.0,0.0,0.01,0.04,0.03,0.01,0.0
4,Grorud,0.1667,0.0,0.0,0.1667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Grünerløkka,0.04,0.0,0.0,0.09,0.03,0.0,0.01,0.01,0.01,0.03,0.01,0.0
6,Nordre Aker,0.0455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1818,0.0,0.0,0.0
7,Nordstrand,0.0,0.0,0.0,0.0,0.0909,0.0,0.0,0.0,0.0909,0.0,0.0,0.0
8,Sagene,0.04,0.0,0.0,0.06,0.02,0.0,0.01,0.0,0.1,0.04,0.02,0.0
9,St. Hanshaugen,0.03,0.0,0.0,0.07,0.03,0.0,0.01,0.01,0.03,0.02,0.0,0.0


In [23]:
indirect_competition['Other Asian restaurants'] = indirect_competition.sum(axis=1)
indirect_competition

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,District,Asian Restaurant,Cantonese Restaurant,Dumpling Restaurant,Indian Restaurant,Japanese Restaurant,Korean Restaurant,Noodle House,Ramen Restaurant,Sushi Restaurant,Thai Restaurant,Vietnamese Restaurant,Other Asian restaurants
0,Alna,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bjerke,0.0882,0.0,0.0,0.0882,0.0,0.0,0.0,0.0,0.1176,0.0294,0.0,0.3234
2,Frogner,0.04,0.01,0.0,0.07,0.03,0.01,0.0,0.0,0.04,0.04,0.0,0.24
3,Gamle Oslo,0.05,0.0,0.01,0.08,0.02,0.0,0.0,0.01,0.04,0.03,0.01,0.25
4,Grorud,0.1667,0.0,0.0,0.1667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3334
5,Grünerløkka,0.04,0.0,0.0,0.09,0.03,0.0,0.01,0.01,0.01,0.03,0.01,0.23
6,Nordre Aker,0.0455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1818,0.0,0.0,0.2273
7,Nordstrand,0.0,0.0,0.0,0.0,0.0909,0.0,0.0,0.0,0.0909,0.0,0.0,0.1818
8,Sagene,0.04,0.0,0.0,0.06,0.02,0.0,0.01,0.0,0.1,0.04,0.02,0.29
9,St. Hanshaugen,0.03,0.0,0.0,0.07,0.03,0.0,0.01,0.01,0.03,0.02,0.0,0.2


In [24]:
competition = pd.concat([direct_competition, indirect_competition['Other Asian restaurants']], axis=1)
competition

Unnamed: 0,District,Chinese Restaurant,Other Asian restaurants
0,Alna,0.0,0.0
1,Bjerke,0.0294,0.3234
2,Frogner,0.02,0.24
3,Gamle Oslo,0.01,0.25
4,Grorud,0.0,0.3334
5,Grünerløkka,0.02,0.23
6,Nordre Aker,0.0,0.2273
7,Nordstrand,0.0,0.1818
8,Sagene,0.01,0.29
9,St. Hanshaugen,0.01,0.2


## k-means cluster analysis of restaurant venues:

In [25]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [26]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
bydel_res_venues_sorted = pd.DataFrame(columns=columns)
bydel_res_venues_sorted['District'] = oslo_grouped['District']

for ind in np.arange(oslo_grouped.shape[0]):
    bydel_res_venues_sorted.iloc[ind, 1:] = return_most_common_venues(oslo_grouped.iloc[ind, :], num_top_venues)

bydel_res_venues_sorted

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alna,Fast Food Restaurant,Bakery,Spanish Restaurant,Café,Vietnamese Restaurant,Diner,Gluten-free Restaurant,Gastropub,French Restaurant,Food Truck
1,Bjerke,Fast Food Restaurant,Pizza Place,Café,Bakery,Sushi Restaurant,Indian Restaurant,Asian Restaurant,Kebab Restaurant,Sandwich Place,Chinese Restaurant
2,Frogner,Scandinavian Restaurant,Café,Bakery,Indian Restaurant,Pizza Place,Burger Joint,Italian Restaurant,Restaurant,Sushi Restaurant,Thai Restaurant
3,Gamle Oslo,Café,Indian Restaurant,Restaurant,Burger Joint,Italian Restaurant,Pizza Place,Asian Restaurant,Sushi Restaurant,Bakery,Turkish Restaurant
4,Grorud,Pizza Place,Indian Restaurant,Asian Restaurant,Kebab Restaurant,Fast Food Restaurant,Dumpling Restaurant,Gluten-free Restaurant,Gastropub,French Restaurant,Food Truck
5,Grünerløkka,Indian Restaurant,Café,Pizza Place,Bakery,Burger Joint,Restaurant,Scandinavian Restaurant,Asian Restaurant,Italian Restaurant,French Restaurant
6,Nordre Aker,Bakery,Sushi Restaurant,Pizza Place,Café,Fast Food Restaurant,Snack Place,Asian Restaurant,Middle Eastern Restaurant,Restaurant,Deli / Bodega
7,Nordstrand,Café,Pizza Place,Japanese Restaurant,Bakery,Sushi Restaurant,Vietnamese Restaurant,Dumpling Restaurant,Gastropub,French Restaurant,Food Truck
8,Sagene,Bakery,Café,Sushi Restaurant,Pizza Place,Indian Restaurant,Asian Restaurant,Thai Restaurant,Scandinavian Restaurant,Burger Joint,Restaurant
9,St. Hanshaugen,Café,Bakery,Indian Restaurant,Burger Joint,Restaurant,Scandinavian Restaurant,Pizza Place,Italian Restaurant,Tapas Restaurant,French Restaurant


In [27]:
# set number of clusters
kclusters = 5

oslo_grouped_clustering = oslo_grouped.drop('District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(oslo_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([4, 0, 3, 3, 0, 3, 3, 2, 3, 3], dtype=int32)

In [28]:
# add clustering labels
bydel_res_venues_sorted.insert(0, 'Cluster Labels Restaurant', kmeans.labels_)

oslo_merged = oslo_df


oslo_merged = oslo_merged.join(bydel_res_venues_sorted.set_index('District'), on='District')

oslo_merged # check the last columns!

Unnamed: 0,District,Inhabitants,Area in km²,Area code,Latitude,Longitude,Cluster Labels Restaurant,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alna,49 358,137,12,59.93108,10.8725,4,Fast Food Restaurant,Bakery,Spanish Restaurant,Café,Vietnamese Restaurant,Diner,Gluten-free Restaurant,Gastropub,French Restaurant,Food Truck
1,Bjerke,31 973,77,9,59.937496,10.80833,0,Fast Food Restaurant,Pizza Place,Café,Bakery,Sushi Restaurant,Indian Restaurant,Asian Restaurant,Kebab Restaurant,Sandwich Place,Chinese Restaurant
2,Frogner,58 283,83,5,59.91674,10.70684,3,Scandinavian Restaurant,Café,Bakery,Indian Restaurant,Pizza Place,Burger Joint,Italian Restaurant,Restaurant,Sushi Restaurant,Thai Restaurant
3,Gamle Oslo,54 575,75,1,59.90647,10.78142,3,Café,Indian Restaurant,Restaurant,Burger Joint,Italian Restaurant,Pizza Place,Asian Restaurant,Sushi Restaurant,Bakery,Turkish Restaurant
4,Grorud,27 525,82,10,59.95967,10.88268,0,Pizza Place,Indian Restaurant,Asian Restaurant,Kebab Restaurant,Fast Food Restaurant,Dumpling Restaurant,Gluten-free Restaurant,Gastropub,French Restaurant,Food Truck
5,Grünerløkka,58 906,48,2,59.92387,10.75784,3,Indian Restaurant,Café,Pizza Place,Bakery,Burger Joint,Restaurant,Scandinavian Restaurant,Asian Restaurant,Italian Restaurant,French Restaurant
6,Nordre Aker,50 724,136,8,59.95613,10.74658,3,Bakery,Sushi Restaurant,Pizza Place,Café,Fast Food Restaurant,Snack Place,Asian Restaurant,Middle Eastern Restaurant,Restaurant,Deli / Bodega
7,Nordstrand,51 169,169,14,59.87883,10.7908,2,Café,Pizza Place,Japanese Restaurant,Bakery,Sushi Restaurant,Vietnamese Restaurant,Dumpling Restaurant,Gastropub,French Restaurant,Food Truck
8,Sagene,43 131,31,3,59.93789,10.75592,3,Bakery,Café,Sushi Restaurant,Pizza Place,Indian Restaurant,Asian Restaurant,Thai Restaurant,Scandinavian Restaurant,Burger Joint,Restaurant
9,St. Hanshaugen,38 109,36,4,59.92875,10.74168,3,Café,Bakery,Indian Restaurant,Burger Joint,Restaurant,Scandinavian Restaurant,Pizza Place,Italian Restaurant,Tapas Restaurant,French Restaurant


In [33]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(oslo_merged['Latitude'], oslo_merged['Longitude'], oslo_merged['District'], oslo_merged['Cluster Labels Restaurant']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=10,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

## k-means cluster analysis of venues:

In [30]:
def getNearbyVenues(names, latitudes, longitudes, radius=1000):
    
    venues_list=[]
    for name, lat, lng in zip(names, latitudes, longitudes):
        print(name)
            
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
            CLIENT_ID, 
            CLIENT_SECRET, 
            VERSION, 
            lat, 
            lng, 
            radius, 
            LIMIT)
            
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(
            name, 
            lat, 
            lng, 
            v['venue']['name'], 
            v['venue']['location']['lat'], 
            v['venue']['location']['lng'],  
            v['venue']['categories'][0]['name']) for v in results])

    nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
    nearby_venues.columns = ['District', 
                  'District Latitude', 
                  'District Longitude', 
                  'Venue', 
                  'Venue Latitude', 
                  'Venue Longitude', 
                  'Venue Category']
    
    return(nearby_venues)

In [31]:
oslo_venues = getNearbyVenues(names=oslo_df['District'],
                                   latitudes=oslo_df['Latitude'],
                                   longitudes=oslo_df['Longitude']
                                  )


Alna
Bjerke
Frogner
Gamle Oslo
Grorud
Grünerløkka
Nordre Aker
Nordstrand
Sagene
St. Hanshaugen
Stovner
Søndre Nordstrand
Ullern
Vestre Aker
Østensjø


In [34]:
# one hot encoding
oslo_onehot = pd.get_dummies(oslo_venues[['Venue Category']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
oslo_onehot['District'] = oslo_venues['District'] 

# move neighborhood column to the first column
fixed_columns = [oslo_onehot.columns[-1]] + list(oslo_onehot.columns[:-1])
oslo_onehot = oslo_onehot[fixed_columns]

oslo_onehot.head()

Unnamed: 0,District,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Basketball Court,Beer Bar,Beer Store,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Station,Bus Stop,Café,Campground,Chinese Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Dive Bar,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Farm,Fast Food Restaurant,Fish Market,Flower Shop,Food,Food Court,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gastropub,Gluten-free Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Health Food Store,History Museum,Hockey Arena,Hockey Rink,Hot Dog Joint,Hotel,Hungarian Restaurant,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Lake,Light Rail Station,Market,Martial Arts Dojo,Metro Station,Mexican Restaurant,Motorcycle Shop,Movie Theater,Moving Target,Music Venue,Nature Preserve,Nightclub,Noodle House,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pub,Racetrack,Ramen Restaurant,Record Shop,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Ski Area,Snack Place,Soccer Field,South American Restaurant,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Theme Park,Track,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Alna,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Alna,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Alna,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Alna,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Alna,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [35]:
print(oslo_venues.shape)

(533, 7)


In [36]:
oslo_grouped = oslo_onehot.groupby('District').mean().reset_index().round(4)
oslo_grouped

Unnamed: 0,District,Art Gallery,Art Museum,Arts & Crafts Store,Asian Restaurant,Athletics & Sports,BBQ Joint,Bakery,Bar,Basketball Court,Beer Bar,Beer Store,Breakfast Spot,Brewery,Burger Joint,Burrito Place,Bus Station,Bus Stop,Café,Campground,Chinese Restaurant,Climbing Gym,Cocktail Bar,Coffee Shop,Comfort Food Restaurant,Concert Hall,Convenience Store,Dance Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Dive Bar,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Farm,Fast Food Restaurant,Fish Market,Flower Shop,Food,Food Court,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Gas Station,Gastropub,Gluten-free Restaurant,Golf Course,Gourmet Shop,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Harbor / Marina,Health Food Store,History Museum,Hockey Arena,Hockey Rink,Hot Dog Joint,Hotel,Hungarian Restaurant,Ice Cream Shop,Indian Restaurant,Italian Restaurant,Japanese Restaurant,Juice Bar,Lake,Light Rail Station,Market,Martial Arts Dojo,Metro Station,Mexican Restaurant,Motorcycle Shop,Movie Theater,Moving Target,Music Venue,Nature Preserve,Nightclub,Noodle House,Park,Performing Arts Venue,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Pub,Racetrack,Ramen Restaurant,Record Shop,Rental Car Location,Restaurant,Salad Place,Salon / Barbershop,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Skating Rink,Ski Area,Snack Place,Soccer Field,South American Restaurant,Spanish Restaurant,Speakeasy,Sporting Goods Shop,Sports Bar,Stadium,Steakhouse,Supermarket,Sushi Restaurant,Tapas Restaurant,Thai Restaurant,Theme Park,Track,Trail,Turkish Restaurant,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Wine Bar,Wine Shop,Yoga Studio
0,Alna,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0714,0.0,0.0714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2857,0.0714,0.0,0.0,0.0,0.0,0.0714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1429,0.0,0.0714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Bjerke,0.0,0.0,0.0,0.0417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0417,0.0,0.0833,0.0,0.0417,0.0,0.0,0.0,0.0,0.0,0.0417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0833,0.0417,0.0417,0.0,0.0,0.0,0.0,0.0,0.0417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0417,0.0,0.0417,0.0,0.0,0.0,0.0,0.0,0.0417,0.0,0.0,0.0417,0.0,0.0,0.0,0.0833,0.0,0.0,0.0,0.0417,0.0,0.0,0.0417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0417,0.0,0.0,0.0,0.0417,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0833,0.0,0.0,0.0,0.0,0.0417,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Frogner,0.0,0.0154,0.0,0.0154,0.0,0.0,0.0615,0.0154,0.0,0.0,0.0,0.0,0.0,0.0154,0.0,0.0,0.0,0.0615,0.0,0.0,0.0,0.0154,0.0154,0.0,0.0,0.0154,0.0,0.0154,0.0,0.0154,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0154,0.0,0.0,0.0,0.0,0.0,0.0,0.0308,0.0154,0.0,0.0154,0.0,0.0308,0.0,0.0154,0.0,0.0,0.0,0.0615,0.0,0.0,0.0308,0.0615,0.0154,0.0308,0.0,0.0308,0.0,0.0,0.0,0.0154,0.0,0.0154,0.0,0.0,0.0,0.0154,0.0,0.0615,0.0,0.0,0.0,0.0308,0.0154,0.0,0.0,0.0,0.0,0.0,0.0,0.0308,0.0154,0.0,0.0154,0.0769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0154,0.0,0.0,0.0,0.0,0.0,0.0154,0.0,0.0308,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0308,0.0
3,Gamle Oslo,0.0,0.0,0.0,0.0,0.0,0.0,0.0328,0.082,0.0,0.0,0.0,0.0,0.0,0.0164,0.0,0.0164,0.0,0.1311,0.0164,0.0,0.0,0.0164,0.0656,0.0,0.0,0.0164,0.0,0.0,0.0,0.0,0.0164,0.0328,0.0,0.0,0.0,0.0164,0.0164,0.0,0.0,0.0164,0.0,0.0,0.0,0.0,0.0,0.0164,0.0,0.0,0.0,0.0656,0.0,0.0328,0.0,0.0,0.0,0.0,0.0164,0.0,0.0,0.0328,0.0,0.0,0.0,0.0164,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0164,0.0,0.0,0.0,0.0,0.0492,0.0,0.0,0.0,0.0328,0.0,0.0328,0.0328,0.0,0.0,0.0164,0.0,0.0164,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0164,0.0,0.0,0.0164,0.0,0.0,0.0,0.0,0.0,0.0164,0.0,0.0,0.0,0.0328,0.0,0.0164,0.0,0.0,0.0,0.0164,0.0,0.0,0.0164,0.0,0.0
4,Grorud,0.0,0.0,0.0,0.0833,0.0833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0833,0.0
5,Grünerløkka,0.01,0.0,0.01,0.02,0.0,0.02,0.02,0.09,0.0,0.02,0.02,0.0,0.03,0.03,0.01,0.0,0.0,0.06,0.0,0.0,0.0,0.05,0.07,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.03,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.05,0.01,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.02,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.01,0.01
6,Nordre Aker,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1538,0.1538,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.3077,0.0,0.0769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0769,0.0,0.0,0.0,0.0,0.0,0.0,0.0769,0.0,0.0,0.0,0.0769,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0769,0.0
7,Nordstrand,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.3125,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Sagene,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.04,0.0,0.0,0.0,0.0133,0.0267,0.0,0.0,0.0267,0.0267,0.08,0.0,0.0,0.0133,0.0133,0.0667,0.0,0.0133,0.0133,0.0,0.0267,0.0,0.0133,0.0133,0.0,0.0,0.0133,0.0,0.0,0.0,0.0133,0.0133,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0133,0.0133,0.0267,0.0,0.0,0.0,0.0133,0.0,0.0,0.0,0.0133,0.0,0.0,0.0533,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0133,0.0,0.0133,0.0,0.0933,0.0133,0.0133,0.0,0.04,0.0,0.0,0.0133,0.0,0.0,0.0,0.0,0.0267,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0133,0.0,0.0,0.0,0.0133,0.0,0.0,0.0,0.0133,0.0667,0.0,0.0133,0.0267,0.0,0.0,0.0,0.0,0.0133,0.0,0.0133,0.0
9,St. Hanshaugen,0.0,0.0,0.0,0.0,0.0,0.0,0.08,0.05,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.02,0.08,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.01,0.0,0.0,0.02,0.01,0.0,0.02,0.03,0.01,0.04,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.01,0.01,0.02,0.05,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.04,0.0,0.0,0.0,0.04,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.06,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.03,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0


In [37]:
def return_most_common_venues(row, num_top_venues):
    row_categories = row.iloc[1:]
    row_categories_sorted = row_categories.sort_values(ascending=False)
    
    return row_categories_sorted.index.values[0:num_top_venues]

In [38]:
num_top_venues = 10

indicators = ['st', 'nd', 'rd']

# create columns according to number of top venues
columns = ['District']
for ind in np.arange(num_top_venues):
    try:
        columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
    except:
        columns.append('{}th Most Common Venue'.format(ind+1))

# create a new dataframe
bydel_venues_sorted = pd.DataFrame(columns=columns)
bydel_venues_sorted['District'] = oslo_grouped['District']

for ind in np.arange(oslo_grouped.shape[0]):
    bydel_venues_sorted.iloc[ind, 1:] = return_most_common_venues(oslo_grouped.iloc[ind, :], num_top_venues)

bydel_venues_sorted

Unnamed: 0,District,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alna,Furniture / Home Store,Hotel,Metro Station,Gas Station,Café,Grocery Store,Motorcycle Shop,Bus Station,Electronics Store,Gym / Fitness Center
1,Bjerke,Supermarket,Café,Pizza Place,Grocery Store,Metro Station,Shopping Mall,Chinese Restaurant,Nature Preserve,Soccer Field,Bus Station
2,Frogner,Scandinavian Restaurant,Italian Restaurant,Café,Hotel,Park,Bakery,Restaurant,Indian Restaurant,Sushi Restaurant,Juice Bar
3,Gamle Oslo,Café,Bar,Coffee Shop,Grocery Store,Park,Sushi Restaurant,Pub,Plaza,Pizza Place,Dive Bar
4,Grorud,Grocery Store,Supermarket,Convenience Store,Wine Shop,Asian Restaurant,Athletics & Sports,Pizza Place,Metro Station,Gym,French Restaurant
5,Grünerløkka,Bar,Coffee Shop,Café,Park,Cocktail Bar,Burger Joint,Indian Restaurant,Italian Restaurant,Tapas Restaurant,Brewery
6,Nordre Aker,Grocery Store,Bus Station,Bus Stop,Metro Station,Hotel,Gym / Fitness Center,Wine Shop,Lake,Health Food Store,Furniture / Home Store
7,Nordstrand,Grocery Store,Bus Station,Light Rail Station,Gas Station,Pet Store,Pizza Place,Bakery,Gym / Fitness Center,Stadium,Metro Station
8,Sagene,Park,Café,Coffee Shop,Sushi Restaurant,Indian Restaurant,Bakery,Bar,Pizza Place,Theme Park,Deli / Bodega
9,St. Hanshaugen,Bakery,Coffee Shop,Scandinavian Restaurant,Indian Restaurant,Café,Bar,Gym / Fitness Center,Park,Pizza Place,Sushi Restaurant


In [39]:
# set number of clusters
kclusters = 5

oslo_grouped_clustering = oslo_grouped.drop('District', 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(oslo_grouped_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10] 

array([0, 1, 1, 1, 2, 1, 2, 2, 1, 1], dtype=int32)

In [40]:
# add clustering labels
bydel_venues_sorted.insert(0, 'Cluster Labels Venue', kmeans.labels_)

oslo_merged2 = oslo_df


oslo_merged2 = oslo_merged2.join(bydel_venues_sorted.set_index('District'), on='District')

oslo_merged2 # check the last columns!

Unnamed: 0,District,Inhabitants,Area in km²,Area code,Latitude,Longitude,Cluster Labels Venue,1st Most Common Venue,2nd Most Common Venue,3rd Most Common Venue,4th Most Common Venue,5th Most Common Venue,6th Most Common Venue,7th Most Common Venue,8th Most Common Venue,9th Most Common Venue,10th Most Common Venue
0,Alna,49 358,137,12,59.93108,10.8725,0,Furniture / Home Store,Hotel,Metro Station,Gas Station,Café,Grocery Store,Motorcycle Shop,Bus Station,Electronics Store,Gym / Fitness Center
1,Bjerke,31 973,77,9,59.937496,10.80833,1,Supermarket,Café,Pizza Place,Grocery Store,Metro Station,Shopping Mall,Chinese Restaurant,Nature Preserve,Soccer Field,Bus Station
2,Frogner,58 283,83,5,59.91674,10.70684,1,Scandinavian Restaurant,Italian Restaurant,Café,Hotel,Park,Bakery,Restaurant,Indian Restaurant,Sushi Restaurant,Juice Bar
3,Gamle Oslo,54 575,75,1,59.90647,10.78142,1,Café,Bar,Coffee Shop,Grocery Store,Park,Sushi Restaurant,Pub,Plaza,Pizza Place,Dive Bar
4,Grorud,27 525,82,10,59.95967,10.88268,2,Grocery Store,Supermarket,Convenience Store,Wine Shop,Asian Restaurant,Athletics & Sports,Pizza Place,Metro Station,Gym,French Restaurant
5,Grünerløkka,58 906,48,2,59.92387,10.75784,1,Bar,Coffee Shop,Café,Park,Cocktail Bar,Burger Joint,Indian Restaurant,Italian Restaurant,Tapas Restaurant,Brewery
6,Nordre Aker,50 724,136,8,59.95613,10.74658,2,Grocery Store,Bus Station,Bus Stop,Metro Station,Hotel,Gym / Fitness Center,Wine Shop,Lake,Health Food Store,Furniture / Home Store
7,Nordstrand,51 169,169,14,59.87883,10.7908,2,Grocery Store,Bus Station,Light Rail Station,Gas Station,Pet Store,Pizza Place,Bakery,Gym / Fitness Center,Stadium,Metro Station
8,Sagene,43 131,31,3,59.93789,10.75592,1,Park,Café,Coffee Shop,Sushi Restaurant,Indian Restaurant,Bakery,Bar,Pizza Place,Theme Park,Deli / Bodega
9,St. Hanshaugen,38 109,36,4,59.92875,10.74168,1,Bakery,Coffee Shop,Scandinavian Restaurant,Indian Restaurant,Café,Bar,Gym / Fitness Center,Park,Pizza Place,Sushi Restaurant


In [41]:
# create map
map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i + x + (i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(oslo_merged2['Latitude'], oslo_merged2['Longitude'], oslo_merged2['District'], oslo_merged2['Cluster Labels Venue']):
    label = folium.Popup(str(poi) + ' Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=10,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [42]:
Evaluation = pd.concat([competition, oslo_merged['Cluster Labels Restaurant'], oslo_merged2['Cluster Labels Venue']], axis=1)
Evaluation

Unnamed: 0,District,Chinese Restaurant,Other Asian restaurants,Cluster Labels Restaurant,Cluster Labels Venue
0,Alna,0.0,0.0,4,0
1,Bjerke,0.0294,0.3234,0,1
2,Frogner,0.02,0.24,3,1
3,Gamle Oslo,0.01,0.25,3,1
4,Grorud,0.0,0.3334,0,2
5,Grünerløkka,0.02,0.23,3,1
6,Nordre Aker,0.0,0.2273,3,2
7,Nordstrand,0.0,0.1818,2,2
8,Sagene,0.01,0.29,3,1
9,St. Hanshaugen,0.01,0.2,3,1


In [43]:
Evaluation['Cluster Labels Restaurant'].replace(3, 'similar', inplace = True)
Evaluation['Cluster Labels Restaurant'].replace(0, '', inplace = True)
Evaluation['Cluster Labels Restaurant'].replace(1, '', inplace = True)
Evaluation['Cluster Labels Restaurant'].replace(2, '', inplace = True)
Evaluation['Cluster Labels Restaurant'].replace(4, '', inplace = True)

Evaluation['Cluster Labels Venue'].replace(2, 'similar', inplace = True)
Evaluation['Cluster Labels Venue'].replace(0, '', inplace = True)
Evaluation['Cluster Labels Venue'].replace(1, '', inplace = True)
Evaluation['Cluster Labels Venue'].replace(3, '', inplace = True)
Evaluation['Cluster Labels Venue'].replace(4, '', inplace = True)

Evaluation['Chinese Restaurant'].replace(0.0000, '', inplace = True)

Evaluation

Unnamed: 0,District,Chinese Restaurant,Other Asian restaurants,Cluster Labels Restaurant,Cluster Labels Venue
0,Alna,,0.0,,
1,Bjerke,0.0294,0.3234,,
2,Frogner,0.02,0.24,similar,
3,Gamle Oslo,0.01,0.25,similar,
4,Grorud,,0.3334,,similar
5,Grünerløkka,0.02,0.23,similar,
6,Nordre Aker,,0.2273,similar,similar
7,Nordstrand,,0.1818,,similar
8,Sagene,0.01,0.29,similar,
9,St. Hanshaugen,0.01,0.2,similar,
