<h3> 1. Libraries Import </h3>

In [180]:
import numpy as np # library to handle data in a vectorized manner

import pandas as pd # library for data analsysis
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

import json # library to handle JSON files

!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import geopy.geocoders

import requests # library to handle requests
from pandas.io.json import json_normalize # tranform JSON file into a pandas dataframe

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors

# import k-means from clustering stage
from sklearn.cluster import KMeans

#!conda install -c conda-forge folium=0.5.0 --yes # uncomment this line if you haven't completed the Foursquare API lab
import folium # map rendering library

print('Libraries imported.')

Collecting package metadata (current_repodata.json): done
Solving environment: done

# All requested packages already installed.

Libraries imported.


<h3> 2. Data Scrapping from Wikipedia into a Data Frame </h3>

In [181]:
WIKI_URL = "https://pl.wikipedia.org/wiki/Podzia%C5%82_administracyjny_Krakowa"
dfs = pd.read_html(WIKI_URL, header=0)

In [182]:
cracow_df = dfs[0]

In [183]:
cracow_df

Unnamed: 0,L.p.,Nazwa dzielnicy,Powierzchnia[ha],Liczba stałychmieszkańców,Zagęszczenie ludności [osób/km²]
0,1,Dzielnica I Stare Miasto,55676,31 359,563241
1,2,Dzielnica II Grzegórzki,58452,29 474,504243
2,3,Dzielnica III Prądnik Czerwony,64379,46 627,724258
3,4,Dzielnica IV Prądnik Biały,234187,70 647,301669
4,5,Dzielnica V Krowodrza,56190,30 223,537872
5,6,Dzielnica VI Bronowice,95596,23 678,247688
6,7,Dzielnica VII Zwierzyniec,287310,20 392,70976
7,8,Dzielnica VIII Dębniki,461887,61 637,133446
8,9,Dzielnica IX Łagiewniki-Borek Fałęcki,54151,15 259,281786
9,10,Dzielnica X Swoszowice,256040,27 493,107378


In [184]:
cracow_df = df.rename(columns={"L.p.":"Index", "Nazwa dzielnicy": "Neighborhood", "Powierzchnia[ha]": "Area", "Liczba stałychmieszkańców": "Population",
                   "Zagęszczenie ludności [osób/km²]": "Population Density"})

In [185]:
cracow_df

Unnamed: 0,Index,Neighborhood,Area,Population,Population Density
0,1,Dzielnica I Stare Miasto,55676,31 359,563241
1,2,Dzielnica II Grzegórzki,58452,29 474,504243
2,3,Dzielnica III Prądnik Czerwony,64379,46 627,724258
3,4,Dzielnica IV Prądnik Biały,234187,70 647,301669
4,5,Dzielnica V Krowodrza,56190,30 223,537872
5,6,Dzielnica VI Bronowice,95596,23 678,247688
6,7,Dzielnica VII Zwierzyniec,287310,20 392,70976
7,8,Dzielnica VIII Dębniki,461887,61 637,133446
8,9,Dzielnica IX Łagiewniki-Borek Fałęcki,54151,15 259,281786
9,10,Dzielnica X Swoszowice,256040,27 493,107378


In [186]:
cracow_df.drop(['Index', 'Area', 'Population', 'Population Density'], axis=1, inplace=True)

<h3> 3. Cracow's neighborhood geographical location data import </h3>

In [187]:
cracow_geo = pd.read_csv('cracow_geo.csv')
cracow_geo.head(6)

Unnamed: 0,Latitude,Longitude
0,50.066583,19.940139
1,50.056611,19.953278
2,50.083232,19.968844
3,50.099239,19.906304
4,50.06667,19.916667
5,50.083333,19.866667


In [188]:
cracow_new = pd.concat([cracow_df, cracow_geo], axis=1)
cracow_new

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Dzielnica I Stare Miasto,50.066583,19.940139
1,Dzielnica II Grzegórzki,50.056611,19.953278
2,Dzielnica III Prądnik Czerwony,50.083232,19.968844
3,Dzielnica IV Prądnik Biały,50.099239,19.906304
4,Dzielnica V Krowodrza,50.06667,19.916667
5,Dzielnica VI Bronowice,50.083333,19.866667
6,Dzielnica VII Zwierzyniec,50.05825,19.864922
7,Dzielnica VIII Dębniki,50.033333,19.883333
8,Dzielnica IX Łagiewniki-Borek Fałęcki,50.016472,19.921083
9,Dzielnica X Swoszowice,49.986379,19.956869


<h3> 4. Cracow map with its neighborhoods creation </h3>

In [216]:
address = 'Cracow, Poland'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Cracow are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Cracow are 50.0606742, 19.9371393.


In [218]:
# create map of Manhattan using latitude and longitude values
map_cracow = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(cracow_new['Latitude'], cracow_new['Longitude'], cracow_new['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_cracow)  
    
map_cracow

In [219]:
map_cracow.save('map_cracow.html')

<h3> 5. Neighborhoods exploration using the Foursquare API </h3>

In [190]:
CLIENT_ID = 'A0DEHD05GV3FTXZXV0KF3IEZMDSNOCMP4H4SLCDJVOGH5QKO' # your Foursquare ID
CLIENT_SECRET = 'IFWZD3EOKTN4DT1IHKW5Y5YF1X1DWG225DSRYK2MUO5O02C1' # your Foursquare Secret
VERSION = '20180605' # Foursquare API version

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: A0DEHD05GV3FTXZXV0KF3IEZMDSNOCMP4H4SLCDJVOGH5QKO
CLIENT_SECRET:IFWZD3EOKTN4DT1IHKW5Y5YF1X1DWG225DSRYK2MUO5O02C1


In [191]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(cracow_new['Latitude'], cracow_new['Longitude'], cracow_new['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

In [192]:
# convert the venues list into a new DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head()

(788, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Dzielnica I Stare Miasto,50.066583,19.940139,Mosquito Hostel,50.06643,19.939899,Hostel
1,Dzielnica I Stare Miasto,50.066583,19.940139,Farina,50.064033,19.939356,Seafood Restaurant
2,Dzielnica I Stare Miasto,50.066583,19.940139,Barbakan,50.065139,19.941417,Historic Site
3,Dzielnica I Stare Miasto,50.066583,19.940139,Społem Deluxe,50.06462,19.941961,Nightclub
4,Dzielnica I Stare Miasto,50.066583,19.940139,Wyszukane desery braci Szewczenko,50.067705,19.941118,Dessert Shop


In [193]:
venues_df.groupby(["Neighborhood"]).count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dzielnica I Stare Miasto,100,100,100,100,100,100
Dzielnica II Grzegórzki,100,100,100,100,100,100
Dzielnica III Prądnik Czerwony,71,71,71,71,71,71
Dzielnica IV Prądnik Biały,35,35,35,35,35,35
Dzielnica IX Łagiewniki-Borek Fałęcki,80,80,80,80,80,80
Dzielnica V Krowodrza,100,100,100,100,100,100
Dzielnica VI Bronowice,26,26,26,26,26,26
Dzielnica VII Zwierzyniec,19,19,19,19,19,19
Dzielnica VIII Dębniki,35,35,35,35,35,35
Dzielnica X Swoszowice,7,7,7,7,7,7


In [194]:
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 142 uniques categories.


In [196]:
# print out the list of categories
venues_df['VenueCategory'].unique()[:100]

array(['Hostel', 'Seafood Restaurant', 'Historic Site', 'Nightclub',
       'Dessert Shop', 'Hotel', 'Arts & Crafts Store', 'Ice Cream Shop',
       'Farmers Market', 'Park', 'French Restaurant', 'Tea Room',
       'Church', 'Art Gallery', 'Polish Restaurant', 'Italian Restaurant',
       'Plaza', 'Pizza Place', 'Beer Bar', 'Steakhouse', 'Burger Joint',
       'Bar', 'Indie Movie Theater', 'Juice Bar', 'Lounge', 'Café',
       'Wine Bar', 'Coffee Shop', 'Cupcake Shop', 'Bakery', 'Food Truck',
       'Vegetarian / Vegan Restaurant', 'Bookstore', 'Castle', 'Museum',
       'Movie Theater', 'Restaurant', 'Hookah Bar', 'Indian Restaurant',
       'Arcade', 'IT Services', 'Cocktail Bar', 'Hotel Bar',
       'Falafel Restaurant', 'Theater', 'Donut Shop', 'Breakfast Spot',
       'Sushi Restaurant', 'Middle Eastern Restaurant', 'Soup Place',
       'Dumpling Restaurant', 'Eastern European Restaurant', 'Pub',
       'Argentinian Restaurant', 'Street Food Gathering', 'Gym', 'Market',
       'Bi

<h3> 6. Neighborhoods Analysis </h3>

In [197]:
# one hot encoding
cracow_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")

# add neighborhood column back to dataframe
cracow_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [cracow_onehot.columns[-1]] + list(cracow_onehot.columns[:-1])
cracow_onehot = cracow_onehot[fixed_columns]

print(cracow_onehot.shape)
cracow_onehot

(788, 143)


Unnamed: 0,Neighborhoods,Airport,American Restaurant,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Athletics & Sports,Bakery,Bar,Bed & Breakfast,Beer Bar,Big Box Store,Bistro,Bookstore,Bowling Alley,Breakfast Spot,Buffet,Burger Joint,Bus Station,Bus Stop,Café,Campground,Castle,Chinese Restaurant,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Cafeteria,College Gym,College Stadium,Concert Hall,Construction & Landscaping,Convenience Store,Cupcake Shop,Department Store,Dessert Shop,Diner,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Forest,French Restaurant,Furniture / Home Store,Gas Station,Go Kart Track,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Lake,Light Rail Station,Lounge,Market,Memorial Site,Middle Eastern Restaurant,Modern European Restaurant,Motel,Movie Theater,Multiplex,Museum,Nightclub,Park,Pedestrian Plaza,Pet Store,Pie Shop,Pizza Place,Platform,Playground,Plaza,Polish Restaurant,Pool,Pub,Ramen Restaurant,Rest Area,Restaurant,Rock Club,Salad Place,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Soccer Field,Soccer Stadium,Soup Place,Spa,Sporting Goods Shop,Squash Court,Stadium,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Toy / Game Store,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Water Park,Wine Bar,Wine Shop,Zoo
0,Dzielnica I Stare Miasto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Dzielnica I Stare Miasto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Dzielnica I Stare Miasto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Dzielnica I Stare Miasto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Dzielnica I Stare Miasto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,Dzielnica I Stare Miasto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,Dzielnica I Stare Miasto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,Dzielnica I Stare Miasto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,Dzielnica I Stare Miasto,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,Dzielnica I Stare Miasto,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [199]:
cracow_grouped = cracow_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(cracow_grouped.shape)
cracow_grouped

(18, 143)


Unnamed: 0,Neighborhoods,Airport,American Restaurant,Arcade,Argentinian Restaurant,Art Gallery,Art Museum,Arts & Crafts Store,Athletics & Sports,Bakery,Bar,Bed & Breakfast,Beer Bar,Big Box Store,Bistro,Bookstore,Bowling Alley,Breakfast Spot,Buffet,Burger Joint,Bus Station,Bus Stop,Café,Campground,Castle,Chinese Restaurant,Church,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,College Cafeteria,College Gym,College Stadium,Concert Hall,Construction & Landscaping,Convenience Store,Cupcake Shop,Department Store,Dessert Shop,Diner,Donut Shop,Dumpling Restaurant,Eastern European Restaurant,Electronics Store,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Flea Market,Food,Food & Drink Shop,Food Court,Food Truck,Forest,French Restaurant,Furniture / Home Store,Gas Station,Go Kart Track,Grocery Store,Gym,Gym / Fitness Center,Gym Pool,Gymnastics Gym,Harbor / Marina,Hardware Store,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Hotel Bar,Hotel Pool,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Italian Restaurant,Japanese Restaurant,Jewelry Store,Juice Bar,Kebab Restaurant,Korean Restaurant,Lake,Light Rail Station,Lounge,Market,Memorial Site,Middle Eastern Restaurant,Modern European Restaurant,Motel,Movie Theater,Multiplex,Museum,Nightclub,Park,Pedestrian Plaza,Pet Store,Pie Shop,Pizza Place,Platform,Playground,Plaza,Polish Restaurant,Pool,Pub,Ramen Restaurant,Rest Area,Restaurant,Rock Club,Salad Place,Sandwich Place,Scandinavian Restaurant,Scenic Lookout,Seafood Restaurant,Shoe Store,Shopping Mall,Skate Park,Soccer Field,Soccer Stadium,Soup Place,Spa,Sporting Goods Shop,Squash Court,Stadium,Steakhouse,Street Food Gathering,Supermarket,Sushi Restaurant,Tea Room,Thai Restaurant,Theater,Theme Park,Toy / Game Store,Train Station,Tram Station,Vegetarian / Vegan Restaurant,Vietnamese Restaurant,Warehouse Store,Water Park,Wine Bar,Wine Shop,Zoo
0,Dzielnica I Stare Miasto,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.03,0.05,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.09,0.0,0.01,0.0,0.02,0.0,0.0,0.01,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.01,0.01,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.01,0.01,0.0,0.11,0.01,0.0,0.01,0.03,0.01,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.04,0.0,0.0,0.0,0.03,0.0,0.0,0.05,0.02,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0
1,Dzielnica II Grzegórzki,0.0,0.0,0.01,0.01,0.01,0.01,0.01,0.0,0.02,0.05,0.0,0.02,0.0,0.01,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.08,0.0,0.01,0.0,0.02,0.0,0.01,0.02,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.01,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.07,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.08,0.0,0.0,0.0,0.04,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.05,0.02,0.0,0.02,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.01,0.0
2,Dzielnica III Prądnik Czerwony,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028169,0.0,0.0,0.0,0.014085,0.0,0.0,0.028169,0.0,0.014085,0.0,0.014085,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.042254,0.014085,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.042254,0.0,0.0,0.014085,0.014085,0.0,0.014085,0.056338,0.0,0.014085,0.056338,0.0,0.0,0.0,0.014085,0.0,0.028169,0.0,0.056338,0.014085,0.014085,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.056338,0.0,0.0,0.014085,0.014085,0.0,0.0,0.056338,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.028169,0.0,0.0,0.0,0.042254,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028169,0.014085,0.0,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.014085,0.014085,0.0,0.0,0.0,0.084507,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.0,0.0,0.0,0.0,0.014085,0.0,0.014085,0.0
3,Dzielnica IV Prądnik Biały,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.114286,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.114286,0.0,0.0,0.0,0.057143,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.057143,0.028571,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Dzielnica IX Łagiewniki-Borek Fałęcki,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,0.0375,0.0,0.0,0.0,0.0,0.0,0.0125,0.0,0.0,0.0,0.0375,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0125,0.0,0.0125,0.0,0.025,0.0,0.0125,0.0125,0.0,0.0,0.0125,0.0,0.0,0.0,0.0,0.025,0.0,0.0,0.0375,0.025,0.0625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.075,0.0,0.0,0.0,0.025,0.0125,0.0,0.05,0.0125,0.0,0.0,0.0125,0.0,0.0,0.0125,0.0,0.0125,0.0,0.0,0.0125,0.0,0.0,0.0125,0.0,0.0,0.0125,0.0,0.0,0.0,0.0375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0375,0.0,0.0,0.0,0.0,0.0125,0.025,0.0,0.0,0.0,0.0,0.1125,0.0125,0.0,0.0,0.0,0.0125,0.0125,0.0,0.075,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Dzielnica V Krowodrza,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.03,0.02,0.01,0.01,0.0,0.03,0.01,0.0,0.02,0.0,0.01,0.0,0.0,0.07,0.0,0.01,0.01,0.02,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.09,0.01,0.0,0.01,0.02,0.0,0.01,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.02,0.07,0.0,0.0,0.0,0.02,0.0,0.0,0.05,0.01,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0
6,Dzielnica VI Bronowice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.115385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.038462,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.115385,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.0,0.0,0.0,0.115385,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Dzielnica VII Zwierzyniec,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.157895,0.0,0.0,0.0,0.0,0.0,0.0,0.105263,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.052632,0.157895,0.0,0.0,0.0,0.0,0.052632,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.052632
8,Dzielnica VIII Dębniki,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.028571,0.057143,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.028571,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.057143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.028571,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0,0.057143,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.028571,0.0,0.0,0.0,0.0,0.0,0.057143,0.028571,0.0,0.0,0.0,0.0,0.0,0.0,0.085714,0.0,0.028571,0.0,0.0,0.0,0.0,0.0
9,Dzielnica X Swoszowice,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.0,0.0,0.142857,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [200]:
len(cracow_grouped[cracow_grouped["Hotel"] > 0])

12

In [201]:
cracow_hotel = cracow_grouped[["Neighborhoods","Hotel"]]

In [202]:
cracow_hotel

Unnamed: 0,Neighborhoods,Hotel
0,Dzielnica I Stare Miasto,0.11
1,Dzielnica II Grzegórzki,0.08
2,Dzielnica III Prądnik Czerwony,0.056338
3,Dzielnica IV Prądnik Biały,0.114286
4,Dzielnica IX Łagiewniki-Borek Fałęcki,0.075
5,Dzielnica V Krowodrza,0.09
6,Dzielnica VI Bronowice,0.115385
7,Dzielnica VII Zwierzyniec,0.157895
8,Dzielnica VIII Dębniki,0.0
9,Dzielnica X Swoszowice,0.142857


<h3> 7. Neighborhood Clustering </h3>

In [203]:
# set number of clusters
kclusters = 3

cracow_clustering = cracow_hotel.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans = KMeans(n_clusters=kclusters, random_state=0).fit(kl_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 1, 1, 2, 1, 1, 2, 2, 0, 2], dtype=int32)

In [205]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
cracow_merged = cracow_hotel.copy()

# add clustering labels
cracow_merged["Cluster Labels"] = kmeans.labels_

In [206]:
cracow_merged.rename(columns={"Neighborhoods": "Neighborhood"}, inplace=True)
cracow_merged

Unnamed: 0,Neighborhood,Hotel,Cluster Labels
0,Dzielnica I Stare Miasto,0.11,2
1,Dzielnica II Grzegórzki,0.08,1
2,Dzielnica III Prądnik Czerwony,0.056338,1
3,Dzielnica IV Prądnik Biały,0.114286,2
4,Dzielnica IX Łagiewniki-Borek Fałęcki,0.075,1
5,Dzielnica V Krowodrza,0.09,1
6,Dzielnica VI Bronowice,0.115385,2
7,Dzielnica VII Zwierzyniec,0.157895,2
8,Dzielnica VIII Dębniki,0.0,0
9,Dzielnica X Swoszowice,0.142857,2


In [207]:
# merge cracow_grouped with cracow_data to add latitude/longitude for each neighborhood
cracow_merged = cracow_merged.join(cracow_new.set_index("Neighborhood"), on="Neighborhood")

print(cracow_merged.shape)
cracow_merged.head()

(18, 5)


Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
0,Dzielnica I Stare Miasto,0.11,2,50.066583,19.940139
1,Dzielnica II Grzegórzki,0.08,1,50.056611,19.953278
2,Dzielnica III Prądnik Czerwony,0.056338,1,50.083232,19.968844
3,Dzielnica IV Prądnik Biały,0.114286,2,50.099239,19.906304
4,Dzielnica IX Łagiewniki-Borek Fałęcki,0.075,1,50.016472,19.921083


In [210]:
# create map
map_clusters = folium.Map(location=[lat, long], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(cracow_merged['Latitude'], cracow_merged['Longitude'], cracow_merged['Neighborhood'], cracow_merged['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

In [212]:
# save the map as HTML ile
map_clusters.save('cracow_map.html')

<h3> 8. Clusters Examination </h3>

In [213]:
cracow_merged.loc[cracow_merged['Cluster Labels']==0]

Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
8,Dzielnica VIII Dębniki,0.0,0,50.033333,19.883333
10,Dzielnica XI Podgórze Duchackie,0.0,0,50.012276,19.964069
12,Dzielnica XIII Podgórze,0.0,0,50.041667,19.983333
13,Dzielnica XIV Czyżyny,0.06,0,50.065723,20.00879
14,Dzielnica XV Mistrzejowice,0.0,0,50.099575,20.005477
15,Dzielnica XVI Bieńczyce,0.020408,0,50.083333,20.016667
16,Dzielnica XVII Wzgórza Krzesławickie,0.0,0,50.101943,20.080496
17,Dzielnica XVIII Nowa Huta,0.0,0,50.066006,20.11439


In [214]:
cracow_merged.loc[cracow_merged['Cluster Labels']==1]

Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
1,Dzielnica II Grzegórzki,0.08,1,50.056611,19.953278
2,Dzielnica III Prądnik Czerwony,0.056338,1,50.083232,19.968844
4,Dzielnica IX Łagiewniki-Borek Fałęcki,0.075,1,50.016472,19.921083
5,Dzielnica V Krowodrza,0.09,1,50.06667,19.916667
11,Dzielnica XII Bieżanów-Prokocim,0.076923,1,50.016328,20.031162


In [215]:
cracow_merged.loc[cracow_merged['Cluster Labels']==2]

Unnamed: 0,Neighborhood,Hotel,Cluster Labels,Latitude,Longitude
0,Dzielnica I Stare Miasto,0.11,2,50.066583,19.940139
3,Dzielnica IV Prądnik Biały,0.114286,2,50.099239,19.906304
6,Dzielnica VI Bronowice,0.115385,2,50.083333,19.866667
7,Dzielnica VII Zwierzyniec,0.157895,2,50.05825,19.864922
9,Dzielnica X Swoszowice,0.142857,2,49.986379,19.956869


<h3> 9. Conclusion </h3>

The first cluster containing the lowest concentration of hotels (marked in red color) is located on the outskirts of the city, mostly in the east side. The second cluster contained the moderate number of hotel concentration (marked in purple) located in the center as well around the city center. The last, third cluster with the highest concentration of hotels (marked in mint green color) was located in the city center and in close proximity to the center but mostly in the northern-west part of the city. The reason for that is the proximity to the airport, hence the high number of hotels only in that part of the town.