<h2>Opening a Cafe in Berlin,Germany

<h3>1.Importing Libraries

In [1]:
import pandas as pd #library for data analysis
pd.set_option('display.max_columns',None)

import numpy as np  

import matplotlib.pyplot as plt

import json #library to handle JSON file

import folium #map rendering library

#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim #convert an address to latitude and longitude
import requests #library to handle rquests

from sklearn.cluster import KMeans #import KMeans from clustering

from bs4 import BeautifulSoup #library for web scrapping

<h3>2.Scrapping Data and converting to DataFrame

In [3]:
#Using the GET request
wikipedia_link='https://en.wikipedia.org/wiki/Neighborhoods_and_neighborhoods_of_Berlin'
wikipedia_page=requests.get(wikipedia_link).text
soup=BeautifulSoup(wikipedia_page,'html5lib')
print(soup.prettify())

<!DOCTYPE html>
<html class="client-nojs" dir="ltr" lang="en">
 <head>
  <meta charset="utf-8"/>
  <title>
   Neighborhoods and neighborhoods of Berlin - Wikipedia
  </title>
  <script>
   document.documentElement.className="client-js";RLCONF={"wgBreakFrames":!1,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"Xo3iOQpAMNMAAcm-59IAAAAQ","wgCSPNonce":!1,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":!1,"wgNamespaceNumber":0,"wgPageName":"Neighborhoods_and_neighborhoods_of_Berlin","wgTitle":"Neighborhoods and neighborhoods of Berlin","wgCurRevisionId":0,"wgRevisionId":0,"wgArticleId":0,"wgIsArticle":!0,"wgIsRedirect":!1,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":[],"wgPageContentLanguage":"en","wgPageContentModel":"wikitext","wgRelevantPageName":"Neighborhoods_and

In [59]:
#Extracting the HTML code
table=soup.find_all('table',{'class':'sortable wikitable'})
table

[<table border="1" cellpadding="5" cellspacing="0" class="sortable wikitable" style="float:left;">
 
 <tbody><tr>
 <th class="sortable" style="border-bottom:1px solid gray; vertical-align:top;">Borough
 </th>
 <th class="sortable" style="border-bottom:1px solid gray; vertical-align:top;"><a href="/wiki/Population" title="Population">Population</a> <br/><small>31 March 2010</small>
 </th>
 <th class="sortable" style="border-bottom:1px solid gray; vertical-align:top;"><a href="/wiki/Area" title="Area">Area</a> <br/><small>in km²</small>
 </th>
 <th class="sortable" style="border-bottom:1px solid gray; vertical-align:top;"><a href="/wiki/Population_density" title="Population density">Density</a> <br/><small>per km²</small>
 </th>
 <th class="unsortable" style="border-bottom:1px solid gray; vertical-align:top;">Map
 </th></tr>
 <tr>
 <td style="border-bottom:1px solid gray;"><a href="/wiki/Charlottenburg-Wilmersdorf" title="Charlottenburg-Wilmersdorf">Charlottenburg-Wilmersdorf</a>
 </td>


In [61]:
#converting table to dataframe
df=pd.read_html(str(table[0]),index_col=None,header=None)[0]
df.head(10)

Unnamed: 0,Borough,Population 31 March 2010,Area in km²,Density per km²,Map
0,Charlottenburg-Wilmersdorf,319628,64.72,4878,
1,Friedrichshain-Kreuzberg,268225,20.16,13187,
2,Lichtenberg,259881,52.29,4952,
3,Marzahn-Hellersdorf,248264,61.74,4046,
4,Mitte,332919,39.47,8272,
5,Neukölln,310283,44.93,6804,
6,Pankow,366441,103.01,3476,
7,Reinickendorf,240454,89.46,2712,
8,Spandau,223962,91.91,2441,
9,Steglitz-Zehlendorf,293989,102.5,2818,


In [63]:
#data preprocessing
berlin=pd.DataFrame({'Neighborhood': df['Neighborhood']})
berlin.head(10)

Unnamed: 0,Neighborhood
0,Charlottenburg-Wilmersdorf
1,Friedrichshain-Kreuzberg
2,Lichtenberg
3,Marzahn-Hellersdorf
4,Mitte
5,Neukölln
6,Pankow
7,Reinickendorf
8,Spandau
9,Steglitz-Zehlendorf


In [64]:
london.shape

(533, 1)

<h3>3.Getting the geographical coordinates

In [66]:
lat=[]
long=[]
geolocator=Nominatim(user_agent='berlin_explorer')
for neigh in berlin['Neighborhood'].tolist():
    location=geolocator.geocode(neigh)
    latitude=location.latitude
    longitude=location.longitude
    lat.append(latitude)
    long.append(longitude)

In [68]:
berlin['Latitude']=lat
berlin['Longitude']=long
berlin

Unnamed: 0,Neighborhood,Latitude,Longitude
0,Charlottenburg-Wilmersdorf,52.507856,13.263952
1,Friedrichshain-Kreuzberg,52.515306,13.461612
2,Lichtenberg,48.921296,7.481227
3,Marzahn-Hellersdorf,52.522523,13.587663
4,Mitte,52.51769,13.402376
5,Neukölln,52.48115,13.43535
6,Pankow,52.597637,13.436374
7,Reinickendorf,52.604763,13.295287
8,Spandau,52.535788,13.197792
9,Steglitz-Zehlendorf,52.429205,13.229974


In [69]:
#Get the coordinates of Berlin
address='Berlin,Germany'

geolocator=Nominatim(user_agent='germany')
location=geolocator.geocode(address)
latitude=location.latitude
longitude=location.longitude
print("The latitude and longitude of {} is {} and {}".format(address,latitude,longitude))

The latitude and longitude of Berlin,Germany is 52.5170365 and 13.3888599


<h3>4.Creating the map of Berlin

In [72]:
map_ber=folium.Map(location=[lat,long],zoom_start=10)

for lat, lng, neighborhood in zip(berlin['Latitude'], berlin['Longitude'], berlin['Neighborhood']):
    label = '{}'.format(neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_ber)  
    
map_ber

In [73]:
# save the map as HTML file
map_ber.save('map_ber.html')

<h3>5.Use Foursquare API to explore Neighbourhoods

In [74]:
CLIENT_ID='3ZH0WNSMC5QCQXW3SJOH4YIJ24KHJ4ZMDP2SCEQ2UFICY1WZ'
CLIENT_SECRET='W0V1JKTAXRFUNT25N31QI0VFZQE3UWWFMJBSFPPEY5D5L23V'
VERSION='20180605'


In [79]:
radius = 2000
LIMIT = 100

venues = []

for lat, long, neighborhood in zip(berlin['Latitude'], berlin['Longitude'], berlin['Neighborhood']):
    
    # create the API request URL
    url = "https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}".format(
        CLIENT_ID,
        CLIENT_SECRET,
        VERSION,
        lat,
        long,
        radius, 
        LIMIT)
    
    # make the GET request
    results = requests.get(url).json()["response"]['groups'][0]['items']
    
    # return only relevant information for each nearby venue
    for venue in results:
        venues.append((
            neighborhood,
            lat, 
            long, 
            venue['venue']['name'], 
            venue['venue']['location']['lat'], 
            venue['venue']['location']['lng'],  
            venue['venue']['categories'][0]['name']))

<h3>6.Checking the neighborhoods

In [80]:
# convert the venues list into a DataFrame
venues_df = pd.DataFrame(venues)

# define the column names
venues_df.columns = ['Neighborhood', 'Latitude', 'Longitude', 'VenueName', 'VenueLatitude', 'VenueLongitude', 'VenueCategory']

print(venues_df.shape)
venues_df.head(10)

(751, 7)


Unnamed: 0,Neighborhood,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
0,Charlottenburg-Wilmersdorf,52.507856,13.263952,Die Wühlmäuse,52.50883,13.270733,Comedy Club
1,Charlottenburg-Wilmersdorf,52.507856,13.263952,Adik's Stehcafe,52.507889,13.258131,Café
2,Charlottenburg-Wilmersdorf,52.507856,13.263952,Rasas,52.5121,13.264464,Indian Restaurant
3,Charlottenburg-Wilmersdorf,52.507856,13.263952,Block House,52.509393,13.270958,Steakhouse
4,Charlottenburg-Wilmersdorf,52.507856,13.263952,Drachenberg,52.502594,13.249834,Mountain
5,Charlottenburg-Wilmersdorf,52.507856,13.263952,Hotel Villa Kastania,52.51031,13.268223,Hotel
6,Charlottenburg-Wilmersdorf,52.507856,13.263952,Mateo-Looi Sushi Restaurant,52.51153,13.268428,Chinese Restaurant
7,Charlottenburg-Wilmersdorf,52.507856,13.263952,Café K,52.509789,13.255227,Café
8,Charlottenburg-Wilmersdorf,52.507856,13.263952,Piccolo Mondo,52.512355,13.267806,Italian Restaurant
9,Charlottenburg-Wilmersdorf,52.507856,13.263952,Lindenwirtin,52.510335,13.271707,German Restaurant


In [81]:
#Counting the number of venues for each neighbourhood.
venues_df.groupby('Neighborhood').count()

Unnamed: 0_level_0,Latitude,Longitude,VenueName,VenueLatitude,VenueLongitude,VenueCategory
Neighborhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Charlottenburg-Wilmersdorf,100,100,100,100,100,100
Friedrichshain-Kreuzberg,100,100,100,100,100,100
Lichtenberg,3,3,3,3,3,3
Marzahn-Hellersdorf,36,36,36,36,36,36
Mitte,100,100,100,100,100,100
Neukölln,100,100,100,100,100,100
Pankow,26,26,26,26,26,26
Reinickendorf,46,46,46,46,46,46
Spandau,86,86,86,86,86,86
Steglitz-Zehlendorf,62,62,62,62,62,62


In [82]:
#finding the unique values in the dataframe
print('There are {} uniques categories.'.format(len(venues_df['VenueCategory'].unique())))

There are 193 uniques categories.


In [94]:
# print out the list of categories
venues_df['VenueCategory'][:194]

0                        Comedy Club
1                               Café
2                  Indian Restaurant
3                         Steakhouse
4                           Mountain
5                              Hotel
6                 Chinese Restaurant
7                               Café
8                 Italian Restaurant
9                  German Restaurant
10                       Supermarket
11                               Bar
12                    Scenic Lookout
13                              Café
14                              Park
15                       Flower Shop
16                   Organic Grocery
17                       Pizza Place
18                        Art Museum
19             Vietnamese Restaurant
20                  Asian Restaurant
21                           Stadium
22                      Concert Hall
23                    Soccer Stadium
24                            Garden
25                Italian Restaurant
26                              Café
2

In [87]:
# one hot encoding
ber_onehot = pd.get_dummies(venues_df[['VenueCategory']], prefix="", prefix_sep="")
ber_onehot['Neighborhoods'] = venues_df['Neighborhood'] 

# move neighborhood column to the first column
fixed_columns = [ber_onehot.columns[-1]] + list(ber_onehot.columns[:-1])
ber_onehot = ber_onehot[fixed_columns]

print(ber_onehot.shape)
ber_onehot.head()

(751, 194)


Unnamed: 0,Neighborhoods,ATM,Adult Boutique,African Restaurant,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Automotive Shop,Bagel Shop,Bakery,Bank,Bar,Bathing Area,Beach,Beach Bar,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bistro,Boarding House,Boat Rental,Boat or Ferry,Bookstore,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Building,Burger Joint,Burrito Place,Bus Stop,Cable Car,Café,Canal,Castle,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Convenience Store,Cosmetics Shop,Cupcake Shop,Cycle Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Dive Bar,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Flower Shop,Food & Drink Shop,Forest,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Go Kart Track,Gourmet Shop,Greek Restaurant,Grocery Store,Gym / Fitness Center,Halal Restaurant,Harbor / Marina,Hardware Store,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Korean Restaurant,Kumpir Restaurant,Lake,Lebanese Restaurant,Light Rail Station,Liquor Store,Lottery Retailer,Lounge,Market,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motorcycle Shop,Mountain,Museum,Nail Salon,Nature Preserve,Neighborhood,Nightclub,Opera House,Optical Shop,Organic Grocery,Outdoor Sculpture,Paintball Field,Park,Persian Restaurant,Pet Store,Pharmacy,Pizza Place,Platform,Playground,Plaza,Poke Place,Pool,Pool Hall,Post Office,Pub,Racetrack,Ramen Restaurant,Rest Area,Restaurant,River,Road,Russian Restaurant,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Soccer Stadium,Soup Place,South American Restaurant,Spa,Spanish Restaurant,Sports Club,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Syrian Restaurant,Tapas Restaurant,Taverna,Tea Room,Thai Restaurant,Theater,Theme Park Ride / Attraction,Theme Restaurant,Trail,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Volleyball Court,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio
0,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,Charlottenburg-Wilmersdorf,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [88]:
ber_grouped = ber_onehot.groupby(["Neighborhoods"]).mean().reset_index()

print(ber_grouped.shape)
ber_grouped

(12, 194)


Unnamed: 0,Neighborhoods,ATM,Adult Boutique,African Restaurant,American Restaurant,Argentinian Restaurant,Art Gallery,Art Museum,Asian Restaurant,Athletics & Sports,Automotive Shop,Bagel Shop,Bakery,Bank,Bar,Bathing Area,Beach,Beach Bar,Beer Bar,Beer Garden,Beer Store,Big Box Store,Bike Rental / Bike Share,Bike Shop,Bistro,Boarding House,Boat Rental,Boat or Ferry,Bookstore,Bowling Alley,Brazilian Restaurant,Breakfast Spot,Brewery,Building,Burger Joint,Burrito Place,Bus Stop,Cable Car,Café,Canal,Castle,Caucasian Restaurant,Cheese Shop,Chinese Restaurant,Chocolate Shop,Climbing Gym,Clothing Store,Cocktail Bar,Coffee Shop,Comedy Club,Concert Hall,Convenience Store,Cosmetics Shop,Cupcake Shop,Cycle Studio,Deli / Bodega,Department Store,Dessert Shop,Diner,Dive Bar,Doner Restaurant,Donut Shop,Drugstore,Dumpling Restaurant,Electronics Store,Falafel Restaurant,Farmers Market,Fast Food Restaurant,Filipino Restaurant,Flower Shop,Food & Drink Shop,Forest,French Restaurant,Fried Chicken Joint,Furniture / Home Store,Garden,Garden Center,Gas Station,Gastropub,Gay Bar,General Entertainment,German Restaurant,Gift Shop,Go Kart Track,Gourmet Shop,Greek Restaurant,Grocery Store,Gym / Fitness Center,Halal Restaurant,Harbor / Marina,Hardware Store,Historic Site,History Museum,Hookah Bar,Hostel,Hot Dog Joint,Hotel,IT Services,Ice Cream Shop,Indian Restaurant,Indie Movie Theater,Indie Theater,Intersection,Italian Restaurant,Japanese Restaurant,Jazz Club,Korean Restaurant,Kumpir Restaurant,Lake,Lebanese Restaurant,Light Rail Station,Liquor Store,Lottery Retailer,Lounge,Market,Men's Store,Metro Station,Mexican Restaurant,Middle Eastern Restaurant,Mobile Phone Shop,Modern European Restaurant,Monument / Landmark,Motorcycle Shop,Mountain,Museum,Nail Salon,Nature Preserve,Neighborhood,Nightclub,Opera House,Optical Shop,Organic Grocery,Outdoor Sculpture,Paintball Field,Park,Persian Restaurant,Pet Store,Pharmacy,Pizza Place,Platform,Playground,Plaza,Poke Place,Pool,Pool Hall,Post Office,Pub,Racetrack,Ramen Restaurant,Rest Area,Restaurant,River,Road,Russian Restaurant,Salon / Barbershop,Sandwich Place,Scenic Lookout,Seafood Restaurant,Shipping Store,Shopping Mall,Skating Rink,Snack Place,Soccer Field,Soccer Stadium,Soup Place,South American Restaurant,Spa,Spanish Restaurant,Sports Club,Stadium,Stationery Store,Steakhouse,Supermarket,Sushi Restaurant,Syrian Restaurant,Tapas Restaurant,Taverna,Tea Room,Thai Restaurant,Theater,Theme Park Ride / Attraction,Theme Restaurant,Trail,Tram Station,Trattoria/Osteria,Turkish Restaurant,Vegetarian / Vegan Restaurant,Video Store,Vietnamese Restaurant,Volleyball Court,Whisky Bar,Wine Bar,Wine Shop,Yoga Studio
0,Charlottenburg-Wilmersdorf,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.02,0.0,0.0,0.0,0.02,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.08,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.07,0.0,0.01,0.03,0.0,0.0,0.0,0.06,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.01,0.01,0.0,0.02,0.0,0.01,0.03,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.03,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.05,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0
1,Friedrichshain-Kreuzberg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.0,0.0,0.06,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.13,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.05,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.04,0.0,0.01,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.04,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.09,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.01,0.02,0.01
2,Lichtenberg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Marzahn-Hellersdorf,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.055556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.194444,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.055556,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.194444,0.0,0.0,0.0,0.0,0.027778,0.0,0.0,0.027778,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Mitte,0.0,0.0,0.0,0.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.02,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.03,0.0,0.0,0.01,0.0,0.0,0.02,0.0,0.02,0.01,0.06,0.0,0.03,0.0,0.01,0.01,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.06,0.0,0.0,0.0,0.08,0.0,0.02,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.01,0.0,0.0,0.0,0.02,0.02,0.0,0.01,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.02,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.01,0.03,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.03,0.0,0.0,0.01,0.0,0.0
5,Neukölln,0.0,0.0,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.1,0.0,0.0,0.0,0.01,0.01,0.01,0.0,0.01,0.0,0.03,0.0,0.0,0.0,0.0,0.0,0.0,0.04,0.0,0.0,0.02,0.0,0.0,0.0,0.07,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.07,0.12,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.0,0.0,0.01,0.01,0.0,0.01,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.04,0.01,0.0,0.0,0.0,0.01,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.02,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,0.0,0.0,0.01,0.0,0.0,0.0,0.02,0.05,0.0,0.02,0.0,0.01,0.0,0.01,0.01
6,Pankow,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.038462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.269231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.115385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Reinickendorf,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.043478,0.021739,0.021739,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.065217,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.043478,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.043478,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.021739,0.0,0.0,0.021739,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.021739,0.0,0.021739,0.0,0.021739,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.130435,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.021739,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Spandau,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.011628,0.011628,0.011628,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.011628,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.011628,0.0,0.081395,0.0,0.034884,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.023256,0.011628,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.034884,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.046512,0.0,0.011628,0.0,0.011628,0.011628,0.023256,0.011628,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.023256,0.0,0.023256,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.023256,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.034884,0.0,0.0,0.0,0.046512,0.0,0.0,0.023256,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.046512,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.011628,0.011628,0.0,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0,0.116279,0.011628,0.0,0.0,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.023256,0.0,0.0,0.0,0.011628,0.0,0.0,0.0,0.0,0.0
9,Steglitz-Zehlendorf,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.032258,0.016129,0.0,0.016129,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.112903,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.032258,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.048387,0.0,0.0,0.0,0.0,0.016129,0.016129,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.016129,0.0,0.016129,0.0,0.016129,0.0,0.0,0.112903,0.0,0.0,0.0,0.0,0.032258,0.0,0.016129,0.0,0.016129,0.0,0.0,0.0,0.016129,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.016129,0.032258,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.016129,0.016129,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.016129,0.0,0.0,0.032258,0.096774,0.016129,0.0,0.0,0.016129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.016129


In [5]:
#finiding the number of cafes in the berlin
len(ber_grouped[ber_grouped["Café"] > 0])

NameError: name 'ber_grouped' is not defined

In [107]:
ber_cafe = ber_grouped[["Neighborhoods","Café"]]
ber_cafe

Unnamed: 0,Neighborhoods,Café
0,Charlottenburg-Wilmersdorf,0.08
1,Friedrichshain-Kreuzberg,0.13
2,Lichtenberg,0.0
3,Marzahn-Hellersdorf,0.0
4,Mitte,0.03
5,Neukölln,0.07
6,Pankow,0.0
7,Reinickendorf,0.021739
8,Spandau,0.034884
9,Steglitz-Zehlendorf,0.112903


<h3>7.Using Kmeans for Clustering

In [106]:
from sklearn.cluster import KMeans
kclusters = 3

ber_clustering = cafe.drop(["Neighborhoods"], 1)

# run k-means clustering
kmeans=KMeans(n_clusters=kclusters, random_state=0).fit(ber_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_[0:10]

array([0, 2, 1, 1, 1, 0, 1, 1, 1, 2])

In [7]:
# create a new dataframe that includes the cluster as well as the top 10 venues for each neighborhood.
ber_merge = cafe.copy()

# add clustering labels
ber_merge["Cluster Labels"] = kmeans.labels_


NameError: name 'cafe' is not defined

In [113]:
ber_merge.head()

Unnamed: 0,Neighborhoods,Café,Cluster Labels
0,Charlottenburg-Wilmersdorf,0.08,0
1,Friedrichshain-Kreuzberg,0.13,2
2,Lichtenberg,0.0,1
3,Marzahn-Hellersdorf,0.0,1
4,Mitte,0.03,1


In [115]:
ber_merge['Latitude']=berlin['Latitude']
ber_merge['Longitude']=berlin['Longitude']
ber_merge

Unnamed: 0,Neighborhoods,Café,Cluster Labels,Latitude,Longitude
0,Charlottenburg-Wilmersdorf,0.08,0,52.507856,13.263952
1,Friedrichshain-Kreuzberg,0.13,2,52.515306,13.461612
2,Lichtenberg,0.0,1,48.921296,7.481227
3,Marzahn-Hellersdorf,0.0,1,52.522523,13.587663
4,Mitte,0.03,1,52.51769,13.402376
5,Neukölln,0.07,0,52.48115,13.43535
6,Pankow,0.0,1,52.597637,13.436374
7,Reinickendorf,0.021739,1,52.604763,13.295287
8,Spandau,0.034884,1,52.535788,13.197792
9,Steglitz-Zehlendorf,0.112903,2,52.429205,13.229974


<h4>Cluster 0


In [117]:
ber_merge.loc[ber_merge['Cluster Labels'] == 0]

Unnamed: 0,Neighborhoods,Café,Cluster Labels,Latitude,Longitude
0,Charlottenburg-Wilmersdorf,0.08,0,52.507856,13.263952
5,Neukölln,0.07,0,52.48115,13.43535


<h4>Cluster 1

In [118]:
ber_merge.loc[ber_merge['Cluster Labels'] == 1]

Unnamed: 0,Neighborhoods,Café,Cluster Labels,Latitude,Longitude
2,Lichtenberg,0.0,1,48.921296,7.481227
3,Marzahn-Hellersdorf,0.0,1,52.522523,13.587663
4,Mitte,0.03,1,52.51769,13.402376
6,Pankow,0.0,1,52.597637,13.436374
7,Reinickendorf,0.021739,1,52.604763,13.295287
8,Spandau,0.034884,1,52.535788,13.197792
10,Tempelhof-Schöneberg,0.028169,1,52.440603,13.373703
11,Treptow-Köpenick,0.0,1,52.417893,13.600185


<h4>Cluster 2

In [119]:
ber_merge.loc[ber_merge['Cluster Labels'] == 2]

Unnamed: 0,Neighborhoods,Café,Cluster Labels,Latitude,Longitude
1,Friedrichshain-Kreuzberg,0.13,2,52.515306,13.461612
9,Steglitz-Zehlendorf,0.112903,2,52.429205,13.229974


<h3>8.Observation

1.Neighborhoods with cluster 2 have high concentration of cafes.These neighborhoods are in the main city of Berlin which attract many tourists and will provide intense competition.<br>
2.Neighborhoods with cluster 0 have moderate concentration of cafes.Setting up a new cafe will be not as challenging as setting it in th main city<br>
3.Neighborhoods with cluster 1 have less concentration of cafes with less competion.Setting up new cafe can generate lot of customers in the neighbourhood.