<h2>Opening a Cafe in Berlin,Germany

<h3>1.Importing Libraries

In [59]:
import pandas as pd #library for data analysis
pd.set_option('display.max_columns',None)

import numpy as np  

import matplotlib.pyplot as plt

import json #library to handle JSON file

import folium #map rendering library

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim #convert an address to latitude and longitude
import requests #library to handle rquests

from sklearn.cluster import KMeans #import KMeans from clustering

from bs4 import BeautifulSoup #library for web scrapping
from IPython.display import Image

import matplotlib.cm as cm
import matplotlib.colors as colors

<h3>2.Scrapping Data and converting to DataFrame

In [21]:
#Using the GET request
wikipedia_link='https://en.wikipedia.org/wiki/Boroughs_and_neighborhoods_of_Berlin'
wikipedia_page=requests.get(wikipedia_link).text
soup=BeautifulSoup(wikipedia_page,'html5lib')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Boroughs and neighborhoods of Berlin - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"XoxeagpAMNQAAqqVno8AAACG","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Boroughs_and_neighborhoods_of_Berlin","wgTitle":"Boroughs and neighborhoods of Berlin","wgCurRevisionId":933151494,"wgRevisionId":933151494,"wgArticleId":4014590,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Articles with German-language sources (de)","Webarchive template wayback links","Articles cont

In [22]:
#Extracting the HTML code
table=soup.find_all('table',{'class':'sortable wikitable'})
table

[<table border="1" cellpadding="5" cellspacing="0" class="sortable wikitable" style="float:left;">
 
 <tbody><tr>
 <th class="sortable" style="border-bottom:1px solid gray; vertical-align:top;">Borough
 </th>
 <th class="sortable" style="border-bottom:1px solid gray; vertical-align:top;"><a href="/wiki/Population" title="Population">Population</a> <br/><small>31 March 2010</small>
 </th>
 <th class="sortable" style="border-bottom:1px solid gray; vertical-align:top;"><a href="/wiki/Area" title="Area">Area</a> <br/><small>in km²</small>
 </th>
 <th class="sortable" style="border-bottom:1px solid gray; vertical-align:top;"><a href="/wiki/Population_density" title="Population density">Density</a> <br/><small>per km²</small>
 </th>
 <th class="unsortable" style="border-bottom:1px solid gray; vertical-align:top;">Map
 </th></tr>
 <tr>
 <td style="border-bottom:1px solid gray;"><a href="/wiki/Charlottenburg-Wilmersdorf" title="Charlottenburg-Wilmersdorf">Charlottenburg-Wilmersdorf</a>
 </td>


In [23]:
#converting table to dataframe
df=pd.read_html(str(table[0]),index_col=None,header=None)[0]
df.head(10)

Unnamed: 0,Borough,Population 31 March 2010,Area in km²,Density per km²,Map
0,Charlottenburg-Wilmersdorf,319628,64.72,4878,
1,Friedrichshain-Kreuzberg,268225,20.16,13187,
2,Lichtenberg,259881,52.29,4952,
3,Marzahn-Hellersdorf,248264,61.74,4046,
4,Mitte,332919,39.47,8272,
5,Neukölln,310283,44.93,6804,
6,Pankow,366441,103.01,3476,
7,Reinickendorf,240454,89.46,2712,
8,Spandau,223962,91.91,2441,
9,Steglitz-Zehlendorf,293989,102.5,2818,


In [25]:
#data preprocessing
berlin=pd.DataFrame({'Neighborhood': df['Borough']})
berlin.head(10)

Unnamed: 0,Neighborhood
0,Charlottenburg-Wilmersdorf
1,Friedrichshain-Kreuzberg
2,Lichtenberg
3,Marzahn-Hellersdorf
4,Mitte
5,Neukölln
6,Pankow
7,Reinickendorf
8,Spandau
9,Steglitz-Zehlendorf


In [26]:
berlin.shape

(12, 1)

<h3>3.Getting the geographical coordinates

In [27]:
lat=[]
long=[]
geolocator=Nominatim(user_agent='berlin_explorer')
for neigh in berlin['Neighborhood'].tolist():
    location=geolocator.geocode(neigh)
    latitude=location.latitude
    longitude=location.longitude
    lat.append(latitude)
    long.append(longitude)

In [28]:
berlin['Latitude']=lat
berlin['Longitude']=long
berlin

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Charlottenburg-Wilmersdorf,52.507856,13.263952
1,Friedrichshain-Kreuzberg,52.515306,13.461612
2,Lichtenberg,48.921296,7.481227
3,Marzahn-Hellersdorf,52.522523,13.587663
4,Mitte,52.51769,13.402376
5,Neukölln,52.48115,13.43535
6,Pankow,52.597637,13.436374
7,Reinickendorf,52.604763,13.295287
8,Spandau,52.535788,13.197792
9,Steglitz-Zehlendorf,52.429205,13.229974


In [29]:
#Get the coordinates of Berlin
address='Berlin,Germany'

geolocator=Nominatim(user_agent='germany')
location=geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
print("The latitude and longitude of {} is {} and {}".format(address,latitude,longitude))

The latitude and longitude of Berlin,Germany is 52.5170365 and 13.3888599


<h3>4.Creating the map of Berlin

In [60]:
map_ber=folium.Map(location=[lat,long],zoom_start=10)

for lat, lng, neighborhood in zip(berlin['Latitude'], berlin['Longitude'], berlin['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_ber)  
    
map_ber

<h3>5.Use Foursquare API to explore Neighbourhoods

In [35]:
CLIENT_ID='3ZH0WNSMC5QCQXW3SJOH4YIJ24KHJ4ZMDP2SCEQ2UFICY1WZ'
CLIENT_SECRET='W0V1JKTAXRFUNT25N31QI0VFZQE3UWWFMJBSFPPEY5D5L23V'
VERSION='20180605'


In [36]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(berlin['Latitude'], berlin['Longitude'], berlin['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

<h3>6.Checking the neighborhoods

In [37]:
# convert the venues list into a DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head(10)

(738, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Charlottenburg-Wilmersdorf,52.507856,13.263952,Die Wühlmäuse,52.50883,13.270733,Comedy Club
1,Charlottenburg-Wilmersdorf,52.507856,13.263952,Adik's Stehcafe,52.507889,13.258131,Café
2,Charlottenburg-Wilmersdorf,52.507856,13.263952,Rasas,52.5121,13.264464,Indian Restaurant
3,Charlottenburg-Wilmersdorf,52.507856,13.263952,Block House,52.509393,13.270958,Steakhouse
4,Charlottenburg-Wilmersdorf,52.507856,13.263952,Drachenberg,52.502594,13.249834,Mountain
5,Charlottenburg-Wilmersdorf,52.507856,13.263952,Hotel Villa Kastania,52.51031,13.268223,Hotel
6,Charlottenburg-Wilmersdorf,52.507856,13.263952,Mateo-Looi Sushi Restaurant,52.51153,13.268428,Chinese Restaurant
7,Charlottenburg-Wilmersdorf,52.507856,13.263952,Café K,52.509789,13.255227,Café
8,Charlottenburg-Wilmersdorf,52.507856,13.263952,Piccolo Mondo,52.512355,13.267806,Italian Restaurant
9,Charlottenburg-Wilmersdorf,52.507856,13.263952,Lindenwirtin,52.510335,13.271707,German Restaurant


In [38]:
#Counting the number of venues for each neighbourhood.
venues_df.groupby('Neighborhood').count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Charlottenburg-Wilmersdorf,100,100,100,100,100,100
Friedrichshain-Kreuzberg,100,100,100,100,100,100
Lichtenberg,3,3,3,3,3,3
Marzahn-Hellersdorf,33,33,33,33,33,33
Mitte,100,100,100,100,100,100
Neukölln,100,100,100,100,100,100
Pankow,24,24,24,24,24,24
Reinickendorf,49,49,49,49,49,49
Spandau,78,78,78,78,78,78
Steglitz-Zehlendorf,58,58,58,58,58,58


In [39]:
#finding the unique values in the dataframe
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 183 uniques categories.


In [40]:
# print out the list of categories
venues_df['VenueCategory'][:194]

0                        Comedy Club
1                               Café
2                  Indian Restaurant
3                         Steakhouse
4                           Mountain
5                              Hotel
6                 Chinese Restaurant
7                               Café
8                 Italian Restaurant
9                  German Restaurant
10                       Supermarket
11                               Bar
12                    Scenic Lookout
13                              Café
14                              Park
15                       Flower Shop
16                   Organic Grocery
17                       Pizza Place
18                        Art Museum
19             Vietnamese Restaurant
20                  Asian Restaurant
21                           Stadium
22                      Concert Hall
23                    Soccer Stadium
24                            Garden
25                Italian Restaurant
26                              Café
2

In [41]:
# one hot encoding
ber_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")
ber_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [ber_onehot.columns[-1]] + list(ber_onehot.columns[:-1])
ber_onehot = ber_onehot[fixed_columns]

print(ber_onehot.shape)
ber_onehot.head()

(738, 184)


Unnamed: 0,Neighborhoods,Adult Boutique,African Restaurant,American Restaurant,Argentinian Restaurant,Art Museum,Asian Restaurant,Athletics & Sports,Automotive Shop,Bagel Shop,Bakery,Bank,Bar,Bathing Area,Beach,Beach Bar,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bistro,Boarding House,Boat Rental,Boat or Ferry,Bookstore,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Building,Burger Joint,Burrito Place,Bus Stop,Cable Car,Café,Canal,Candy Store,Castle,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Cosmetics Shop,Cupcake Shop,Currywurst Joint,Cycle Studio,Deli / Bodega,Department Store,Dessert Shop,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Flower Shop,Food & Drink Shop,Forest,Fried Chicken Joint,Furniture / Home Store,Garden,Gas Station,Gastropub,General Entertainment,German Restaurant,Gift Shop,Go Kart Track,Gourmet Shop,Greek Restaurant,Grocery Store,Gun Shop,Gym,Gym / Fitness Center,Harbor / Marina,Hardware Store,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Korean Restaurant,Kumpir Restaurant,Lake,Lebanese Restaurant,Light Rail Station,Liquor Store,Lottery Retailer,Lounge,Market,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motorcycle Shop,Mountain,Museum,Nail Salon,Nature Preserve,Neighborhood,Opera House,Optical Shop,Organic Grocery,Outdoor Sculpture,Paintball Field,Park,Persian Restaurant,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Post Office,Pub,Racetrack,Ramen Restaurant,Recreation Center,Rest Area,Restaurant,River,Road,Russian Restaurant,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Spa,Spanish Restaurant,Sports Club,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Syrian Restaurant,Tapas Restaurant,Taverna,Tea Room,Thai Restaurant,Theater,Theme Park Ride / Attraction,Theme Restaurant,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Volleyball Court,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio
0,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
ber_grouped = ber_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(ber_grouped.shape)
ber_grouped

(12, 184)


Unnamed: 0,Neighborhoods,Adult Boutique,African Restaurant,American Restaurant,Argentinian Restaurant,Art Museum,Asian Restaurant,Athletics & Sports,Automotive Shop,Bagel Shop,Bakery,Bank,Bar,Bathing Area,Beach,Beach Bar,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bistro,Boarding House,Boat Rental,Boat or Ferry,Bookstore,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Building,Burger Joint,Burrito Place,Bus Stop,Cable Car,Café,Canal,Candy Store,Castle,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Cosmetics Shop,Cupcake Shop,Currywurst Joint,Cycle Studio,Deli / Bodega,Department Store,Dessert Shop,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Event Space,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Flower Shop,Food & Drink Shop,Forest,Fried Chicken Joint,Furniture / Home Store,Garden,Gas Station,Gastropub,General Entertainment,German Restaurant,Gift Shop,Go Kart Track,Gourmet Shop,Greek Restaurant,Grocery Store,Gun Shop,Gym,Gym / Fitness Center,Harbor / Marina,Hardware Store,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Israeli Restaurant,Italian Restaurant,Japanese Restaurant,Jazz Club,Korean Restaurant,Kumpir Restaurant,Lake,Lebanese Restaurant,Light Rail Station,Liquor Store,Lottery Retailer,Lounge,Market,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motorcycle Shop,Mountain,Museum,Nail Salon,Nature Preserve,Neighborhood,Opera House,Optical Shop,Organic Grocery,Outdoor Sculpture,Paintball Field,Park,Persian Restaurant,Pet Store,Pharmacy,Pizza Place,Playground,Plaza,Poke Place,Pool,Pool Hall,Post Office,Pub,Racetrack,Ramen Restaurant,Recreation Center,Rest Area,Restaurant,River,Road,Russian Restaurant,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Soccer Stadium,Soup Place,Spa,Spanish Restaurant,Sports Club,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Syrian Restaurant,Tapas Restaurant,Taverna,Tea Room,Thai Restaurant,Theater,Theme Park Ride / Attraction,Theme Restaurant,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Volleyball Court,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio
0,Charlottenburg-Wilmersdorf,0.0,0.0,0.01,0.01,0.01,0.02,0.0,0.0,0.0,0.03,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.07,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.07,0.0,0.03,0.0,0.0,0.0,0.07,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.01,0.01,0.0,0.02,0.01,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.05,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
1,Friedrichshain-Kreuzberg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.14,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.05,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.04,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.09,0.01,0.01,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.02,0.01
2,Lichtenberg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Marzahn-Hellersdorf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.060606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.212121,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.030303,0.0,0.0,0.030303,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.030303,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.242424,0.0,0.0,0.0,0.0,0.030303,0.0,0.0,0.030303,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Mitte,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.01,0.01,0.06,0.0,0.03,0.02,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.05,0.0,0.0,0.0,0.08,0.02,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.01,0.0,0.01,0.02,0.02,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0
5,Neukölln,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.11,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.04,0.12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.04,0.01,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.04,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.02,0.04,0.0,0.02,0.0,0.01,0.0,0.01,0.0
6,Pankow,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.041667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Reinickendorf,0.020408,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040816,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.061224,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.020408,0.0,0.020408,0.0,0.0,0.0,0.0,0.020408,0.0,0.040816,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.020408,0.0,0.0,0.0,0.020408,0.0,0.0,0.040816,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.040816,0.020408,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.040816,0.0,0.0,0.020408,0.0,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.020408,0.0,0.020408,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.122449,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.020408,0.0
8,Spandau,0.0,0.0,0.0,0.025641,0.0,0.012821,0.012821,0.012821,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.012821,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.012821,0.0,0.064103,0.0,0.025641,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.025641,0.012821,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.012821,0.0,0.0,0.0,0.038462,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.051282,0.0,0.012821,0.0,0.012821,0.012821,0.012821,0.012821,0.025641,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.025641,0.025641,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.051282,0.0,0.012821,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.051282,0.0,0.0,0.0,0.0,0.012821,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0,0.102564,0.012821,0.0,0.0,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.025641,0.0,0.0,0.0,0.012821,0.0,0.0,0.0,0.0,0.0
9,Steglitz-Zehlendorf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.034483,0.017241,0.0,0.017241,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.103448,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.034483,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051724,0.0,0.0,0.017241,0.0,0.017241,0.0,0.0,0.017241,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.017241,0.0,0.017241,0.0,0.0,0.086207,0.0,0.0,0.0,0.0,0.034483,0.0,0.034483,0.0,0.017241,0.0,0.017241,0.0,0.017241,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.017241,0.0,0.0,0.0,0.017241,0.017241,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.017241,0.0,0.0,0.034483,0.068966,0.017241,0.0,0.0,0.017241,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.017241


In [47]:
#finiding the number of cafes in the berlin
len(ber_grouped[ber_grouped["Café"] > 0])

10

In [48]:
ber_cafe = ber_grouped[["Neighborhoods","Café"]]
ber_cafe

Unnamed: 0,Neighborhoods,Café
0,Charlottenburg-Wilmersdorf,0.07
1,Friedrichshain-Kreuzberg,0.14
2,Lichtenberg,0.0
3,Marzahn-Hellersdorf,0.0
4,Mitte,0.02
5,Neukölln,0.07
6,Pankow,0.041667
7,Reinickendorf,0.040816
8,Spandau,0.025641
9,Steglitz-Zehlendorf,0.103448


<h3>7.Using Kmeans for Clustering

In [50]:
from sklearn.cluster import KMeans
kclusters = 3

ber_clustering = ber_cafe.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans=KMeans(n_clusters=kclusters, random_state=0).fit(ber_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([2, 1, 0, 0, 0, 2, 2, 2, 0, 1])

In [52]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
ber_merge = ber_cafe.copy()

# add clustering labels
ber_merge["Cluster Labels"] = kmeans.labels_


In [53]:
ber_merge.head()

Unnamed: 0,Neighborhoods,Café,Cluster Labels
0,Charlottenburg-Wilmersdorf,0.07,2
1,Friedrichshain-Kreuzberg,0.14,1
2,Lichtenberg,0.0,0
3,Marzahn-Hellersdorf,0.0,0
4,Mitte,0.02,0


In [54]:
ber_merge['Latitude']=berlin['Latitude']
ber_merge['Longitude']=berlin['Longitude']
ber_merge

Unnamed: 0,Neighborhoods,Café,Cluster Labels,Latitude,Longitude
0,Charlottenburg-Wilmersdorf,0.07,2,52.507856,13.263952
1,Friedrichshain-Kreuzberg,0.14,1,52.515306,13.461612
2,Lichtenberg,0.0,0,48.921296,7.481227
3,Marzahn-Hellersdorf,0.0,0,52.522523,13.587663
4,Mitte,0.02,0,52.51769,13.402376
5,Neukölln,0.07,2,52.48115,13.43535
6,Pankow,0.041667,2,52.597637,13.436374
7,Reinickendorf,0.040816,2,52.604763,13.295287
8,Spandau,0.025641,0,52.535788,13.197792
9,Steglitz-Zehlendorf,0.103448,1,52.429205,13.229974


In [58]:
# create map

map_clusters = folium.Map(location=[latitude, longitude], zoom_start=11)

# set color scheme for the clusters
x = np.arange(kclusters)
ys = [i+x+(i*x)**2 for i in range(kclusters)]
colors_array = cm.rainbow(np.linspace(0, 1, len(ys)))
rainbow = [colors.rgb2hex(i) for i in colors_array]

# add markers to the map
markers_colors = []
for lat, lon, poi, cluster in zip(ber_merge['Latitude'], ber_merge['Longitude'], ber_merge['Neighborhoods'], ber_merge['Cluster Labels']):
    label = folium.Popup(str(poi) + ' - Cluster ' + str(cluster), parse_html=True)
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        popup=label,
        color=rainbow[cluster-1],
        fill=True,
        fill_color=rainbow[cluster-1],
        fill_opacity=0.7).add_to(map_clusters)
       
map_clusters

<h4>Cluster 0


In [61]:
ber_merge.loc[ber_merge['Cluster Labels'] == 0]

Unnamed: 0,Neighborhoods,Café,Cluster Labels,Latitude,Longitude
2,Lichtenberg,0.0,0,48.921296,7.481227
3,Marzahn-Hellersdorf,0.0,0,52.522523,13.587663
4,Mitte,0.02,0,52.51769,13.402376
8,Spandau,0.025641,0,52.535788,13.197792
10,Tempelhof-Schöneberg,0.027778,0,52.440603,13.373703


<h4>Cluster 1

In [62]:
ber_merge.loc[ber_merge['Cluster Labels'] == 1]

Unnamed: 0,Neighborhoods,Café,Cluster Labels,Latitude,Longitude
1,Friedrichshain-Kreuzberg,0.14,1,52.515306,13.461612
9,Steglitz-Zehlendorf,0.103448,1,52.429205,13.229974


<h4>Cluster 2

In [63]:
ber_merge.loc[ber_merge['Cluster Labels'] == 2]

Unnamed: 0,Neighborhoods,Café,Cluster Labels,Latitude,Longitude
0,Charlottenburg-Wilmersdorf,0.07,2,52.507856,13.263952
5,Neukölln,0.07,2,52.48115,13.43535
6,Pankow,0.041667,2,52.597637,13.436374
7,Reinickendorf,0.040816,2,52.604763,13.295287
11,Treptow-Köpenick,0.047619,2,52.417893,13.600185


<h3>8.Observation

1.Neighborhoods with cluster 2 have high concentration of cafes.These neighborhoods are in the main city of Berlin which attract many tourists and will provide intense competition.<br>
2.Neighborhoods with cluster 0 have moderate concentration of cafes.Setting up a new cafe will be not as challenging as setting it in th main city<br>
3.Neighborhoods with cluster 1 have less concentration of cafes with less competion.Setting up new cafe can generate lot of customers in the neighbourhood.