In [1]:
import requests
import pickle

In [2]:
def get_coordinates(api_key, address, verbose=False):
    try:
        url = 'https://maps.googleapis.com/maps/api/geocode/json?key={}&address={}'.format(api_key, address)
        response = requests.get(url).json()
        if verbose:
            print('Google Maps API JSON result =>', response)
        results = response['results']
        geographical_data = results[0]['geometry']['location'] # get geographical coordinates
        lat = geographical_data['lat']
        lon = geographical_data['lng']
        return [lat, lon]
    except:
        return [None, None]
    
address = 'Alexanderplatz, Berlin, Germany'
google_api_key = 'AIzaSyDor0BOkZN3m6puWOzj0XI7vk8lP29NtZU'
berlin_center = get_coordinates(google_api_key, address)
print('Coordinate of {}: {}'.format(address, berlin_center))

Coordinate of Alexanderplatz, Berlin, Germany: [52.5219184, 13.4132147]


In [5]:
import sys
print(sys.path)
!pip install shapely
import shapely.geometry

!pip install pyproj
import pyproj

import math

def lonlat_to_xy(lon, lat):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    xy = pyproj.transform(proj_latlon, proj_xy, lon, lat)
    return xy[0], xy[1]

def xy_to_lonlat(x, y):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    lonlat = pyproj.transform(proj_xy, proj_latlon, x, y)
    return lonlat[0], lonlat[1]

def calc_xy_distance(x1, y1, x2, y2):
    dx = x2 - x1
    dy = y2 - y1
    return math.sqrt(dx*dx + dy*dy)

print('Coordinate transformation check')
print('-------------------------------')
print('Berlin center longitude={}, latitude={}'.format(berlin_center[1], berlin_center[0]))
x, y = lonlat_to_xy(berlin_center[1], berlin_center[0])
print('Berlin center UTM X={}, Y={}'.format(x, y))
lo, la = xy_to_lonlat(x, y)
print('Berlin center longitude={}, latitude={}'.format(lo, la))

['', '/opt/conda/envs/Python36/lib/python36.zip', '/opt/conda/envs/Python36/lib/python3.6', '/opt/conda/envs/Python36/lib/python3.6/lib-dynload', '/home/dsxuser/.local/lib/python3.6/site-packages', '/opt/conda/envs/Python36/lib/python3.6/site-packages', '/opt/conda/envs/Python36/lib/python3.6/site-packages/IPython/extensions', '/home/dsxuser/.ipython']
Collecting shapely
[?25l  Downloading https://files.pythonhosted.org/packages/38/b6/b53f19062afd49bb5abd049aeed36f13bf8d57ef8f3fa07a5203531a0252/Shapely-1.6.4.post2-cp36-cp36m-manylinux1_x86_64.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 6.2MB/s eta 0:00:01
[?25hInstalling collected packages: shapely
Successfully installed shapely-1.6.4.post2
Collecting pyproj
[?25l  Downloading https://files.pythonhosted.org/packages/d6/70/eedc98cd52b86de24a1589c762612a98bea26cde649ffdd60c1db396cce8/pyproj-2.4.2.post1-cp36-cp36m-manylinux2010_x86_64.whl (10.1MB)
[K     |████████████████████████████████| 10.1MB 6.5MB/s eta 0:00:01
[

# Let's create a hexagonal grid of cells: we offset every other row, and adjust vertical row spacing so that every cell center is equally distant from all it's neighbors.

In [6]:
berlin_center_x, berlin_center_y = lonlat_to_xy(berlin_center[1], berlin_center[0]) # City center in Cartesian coordinates

k = math.sqrt(3) / 2 # Vertical offset for hexagonal grid cells
x_min = berlin_center_x - 6000
x_step = 600
y_min = berlin_center_y - 6000 - (int(21/k)*k*600 - 12000)/2
y_step = 600 * k 

latitudes = []
longitudes = []
distances_from_center = []
xs = []
ys = []
for i in range(0, int(21/k)):
    y = y_min + i * y_step
    x_offset = 300 if i%2==0 else 0
    for j in range(0, 21):
        x = x_min + j * x_step + x_offset
        distance_from_center = calc_xy_distance(berlin_center_x, berlin_center_y, x, y)
        if (distance_from_center <= 6001):
            lon, lat = xy_to_lonlat(x, y)
            latitudes.append(lat)
            longitudes.append(lon)
            distances_from_center.append(distance_from_center)
            xs.append(x)
            ys.append(y)

print(len(latitudes), 'candidate neighborhood centers generated.')

364 candidate neighborhood centers generated.


Let's visualize the data we have so far: city center location and candidate neighborhood centers:

In [7]:
!pip install folium

import folium

Collecting folium
[?25l  Downloading https://files.pythonhosted.org/packages/fd/a0/ccb3094026649cda4acd55bf2c3822bb8c277eb11446d13d384e5be35257/folium-0.10.1-py2.py3-none-any.whl (91kB)
[K     |████████████████████████████████| 92kB 9.2MB/s eta 0:00:01
[?25hCollecting branca>=0.3.0 (from folium)
  Downloading https://files.pythonhosted.org/packages/63/36/1c93318e9653f4e414a2e0c3b98fc898b4970e939afeedeee6075dd3b703/branca-0.3.1-py3-none-any.whl
Installing collected packages: branca, folium
Successfully installed branca-0.3.1 folium-0.10.1


In [8]:
map_berlin = folium.Map(location=berlin_center, zoom_start=13)
folium.Marker(berlin_center, popup='Alexanderplatz').add_to(map_berlin)
for lat, lon in zip(latitudes, longitudes):
    #folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin) 
    folium.Circle([lat, lon], radius=300, color='blue', fill=False).add_to(map_berlin)
    #folium.Marker([lat, lon]).add_to(map_berlin)
map_berlin

Let's now use Google Maps API to get approximate addresses of those locations.

In [9]:
def get_address(api_key, latitude, longitude, verbose=False):
    try:
        url = 'https://maps.googleapis.com/maps/api/geocode/json?key={}&latlng={},{}'.format(api_key, latitude, longitude)
        response = requests.get(url).json()
        if verbose:
            print('Google Maps API JSON result =>', response)
        results = response['results']
        address = results[0]['formatted_address']
        return address
    except:
        return None

addr = get_address(google_api_key, berlin_center[0], berlin_center[1])
print('Reverse geocoding check')
print('-----------------------')
print('Address of [{}, {}] is: {}'.format(berlin_center[0], berlin_center[1], addr))

Reverse geocoding check
-----------------------
Address of [52.5219184, 13.4132147] is: U Alexanderplatz, 10178 Berlin, Germany


In [10]:
print('Obtaining location addresses: ', end='')
addresses = []
for lat, lon in zip(latitudes, longitudes):
    address = get_address(google_api_key, lat, lon)
    if address is None:
        address = 'NO ADDRESS'
    address = address.replace(', Germany', '') # We don't need country part of address
    addresses.append(address)
    print(' .', end='')
print(' done.')

Obtaining location addresses:  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . done.


In [11]:
addresses[150:170]

['Frankfurter Allee 147-149, 10365 Berlin',
 'Magdalenenstraße 12, 10365 Berlin',
 'Siegfriedstraße 207, 10365 Berlin',
 'Wilhelm-Caspar-Wegely-Platz 7, 10587 Berlin',
 'Händelallee 51, 10557 Berlin',
 'Spreeweg 1, 10557 Berlin',
 'John-Foster-Dulles-Allee 10, 10557 Berlin',
 'B96, 10557 Berlin',
 'Pariser Platz 6a, 10117 Berlin',
 'B2 38, 10117 Berlin',
 'Unter den Linden 5, 10117 Berlin',
 'Spreeufer 6, 10178 Berlin',
 'Waisenstraße 24, 10179 Berlin',
 'Neue Blumenstraße 1, 10179 Berlin',
 'Blumenstraße 47, 10243 Berlin',
 'Karl-Marx-Allee 89, 10243 Berlin',
 'Weidenweg 27, 10249 Berlin',
 'Rigaer Str. 96, 10247 Berlin',
 'Bänschstraße 58, 10247 Berlin',
 'Parkaue 30, 10367 Berlin']

Looking good. Let's now place all this into a Pandas dataframe.

In [12]:
import pandas as pd

df_locations = pd.DataFrame({'Address': addresses,
                             'Latitude': latitudes,
                             'Longitude': longitudes,
                             'X': xs,
                             'Y': ys,
                             'Distance from center': distances_from_center})

df_locations.head(10)

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center
0,"Ringbahnstraße 50, 12099 Berlin",52.470194,13.388575,390541.280176,5814557.0,5992.495307
1,"09R/27L, 12101 Berlin",52.470314,13.397404,391141.280176,5814557.0,5840.3767
2,"09R/27L, 12049 Berlin",52.470434,13.406234,391741.280176,5814557.0,5747.173218
3,"Oderstraße 174, 12049 Berlin",52.470552,13.415063,392341.280176,5814557.0,5715.767665
4,"Warthestraße 23A, 12051 Berlin",52.47067,13.423893,392941.280176,5814557.0,5747.173218
5,"Altenbraker Str. 15, 12053 Berlin",52.470788,13.432722,393541.280176,5814557.0,5840.3767
6,"Karl-Marx-Straße 212, 12055 Berlin",52.470904,13.441552,394141.280176,5814557.0,5992.495307
7,"Hessenring 34, 12101 Berlin",52.474683,13.375159,389641.280176,5815077.0,5855.766389
8,"Kleineweg 125, 12101 Berlin",52.474804,13.383989,390241.280176,5815077.0,5604.462508
9,"09L/27R, 12101 Berlin",52.474924,13.39282,390841.280176,5815077.0,5408.326913


...and let's now save/persist this data into local file.

In [13]:
df_locations.to_pickle('./locations.pkl')

In [14]:
CLIENT_ID = 'PFWOPKA0A5PWHANBLR2ZDOGST3EDMEPDH2C4UUDVKKW51JFD' # your Foursquare ID
CLIENT_SECRET = '3BZX1XUJ4EBPR0DSDIR1XULMPHEL4XOUR4EGTKP0GY44OKFF' # your Foursquare Secret
VERSION = '20180604'
LIMIT = 30
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: PFWOPKA0A5PWHANBLR2ZDOGST3EDMEPDH2C4UUDVKKW51JFD
CLIENT_SECRET:3BZX1XUJ4EBPR0DSDIR1XULMPHEL4XOUR4EGTKP0GY44OKFF


In [17]:
# Category IDs corresponding to Italian restaurants were taken from Foursquare web site (https://developer.foursquare.com/docs/resources/categories):

food_category = '4d4b7105d754a06374d81259' # 'Root' category for all food-related venues
foursquare_client_id = 'PFWOPKA0A5PWHANBLR2ZDOGST3EDMEPDH2C4UUDVKKW51JFD'
foursquare_client_secret = '3BZX1XUJ4EBPR0DSDIR1XULMPHEL4XOUR4EGTKP0GY44OKFF'

italian_restaurant_categories = ['4bf58dd8d48988d110941735','55a5a1ebe4b013909087cbb6','55a5a1ebe4b013909087cb7c',
                                 '55a5a1ebe4b013909087cba7','55a5a1ebe4b013909087cba1','55a5a1ebe4b013909087cba4',
                                 '55a5a1ebe4b013909087cb95','55a5a1ebe4b013909087cb89','55a5a1ebe4b013909087cb9b',
                                 '55a5a1ebe4b013909087cb98','55a5a1ebe4b013909087cbbf','55a5a1ebe4b013909087cb79',
                                 '55a5a1ebe4b013909087cbb0','55a5a1ebe4b013909087cbb3','55a5a1ebe4b013909087cb74',
                                 '55a5a1ebe4b013909087cbaa','55a5a1ebe4b013909087cb83','55a5a1ebe4b013909087cb8c',
                                 '55a5a1ebe4b013909087cb92','55a5a1ebe4b013909087cb8f','55a5a1ebe4b013909087cb86',
                                 '55a5a1ebe4b013909087cbb9','55a5a1ebe4b013909087cb7f','55a5a1ebe4b013909087cbbc',
                                 '55a5a1ebe4b013909087cb9e','55a5a1ebe4b013909087cbc2','55a5a1ebe4b013909087cbad']

def is_restaurant(categories, specific_filter=None):
    restaurant_words = ['restaurant', 'diner', 'taverna', 'steakhouse']
    restaurant = False
    specific = False
    for c in categories:
        category_name = c[0].lower()
        category_id = c[1]
        for r in restaurant_words:
            if r in category_name:
                restaurant = True
        if 'fast food' in category_name:
            restaurant = False
        if not(specific_filter is None) and (category_id in specific_filter):
            specific = True
            restaurant = True
    return restaurant, specific

def get_categories(categories):
    return [(cat['name'], cat['id']) for cat in categories]

def format_address(location):
    address = ', '.join(location['formattedAddress'])
    address = address.replace(', Deutschland', '')
    address = address.replace(', Germany', '')
    return address

def get_venues_near_location(lat, lon, category, client_id, client_secret, radius=500, limit=100):
    version = '20180724'
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
        client_id, client_secret, version, lat, lon, category, radius, limit)
    try:
        results = requests.get(url).json()['response']['groups'][0]['items']
        venues = [(item['venue']['id'],
                   item['venue']['name'],
                   get_categories(item['venue']['categories']),
                   (item['venue']['location']['lat'], item['venue']['location']['lng']),
                   format_address(item['venue']['location']),
                   item['venue']['location']['distance']) for item in results]        
    except:
        venues = []
    return venues

In [18]:
# Let's now go over our neighborhood locations and get nearby restaurants; we'll also maintain a dictionary of all found restaurants and all found italian restaurants

import pickle

def get_restaurants(lats, lons):
    restaurants = {}
    italian_restaurants = {}
    location_restaurants = []

    print('Obtaining venues around candidate locations:', end='')
    for lat, lon in zip(lats, lons):
        # Using radius=350 to meke sure we have overlaps/full coverage so we don't miss any restaurant (we're using dictionaries to remove any duplicates resulting from area overlaps)
        venues = get_venues_near_location(lat, lon, food_category, foursquare_client_id, foursquare_client_secret, radius=350, limit=100)
        area_restaurants = []
        for venue in venues:
            venue_id = venue[0]
            venue_name = venue[1]
            venue_categories = venue[2]
            venue_latlon = venue[3]
            venue_address = venue[4]
            venue_distance = venue[5]
            is_res, is_italian = is_restaurant(venue_categories, specific_filter=italian_restaurant_categories)
            if is_res:
                x, y = lonlat_to_xy(venue_latlon[1], venue_latlon[0])
                restaurant = (venue_id, venue_name, venue_latlon[0], venue_latlon[1], venue_address, venue_distance, is_italian, x, y)
                if venue_distance<=300:
                    area_restaurants.append(restaurant)
                restaurants[venue_id] = restaurant
                if is_italian:
                    italian_restaurants[venue_id] = restaurant
        location_restaurants.append(area_restaurants)
        print(' .', end='')
    print(' done.')
    return restaurants, italian_restaurants, location_restaurants

# Try to load from local file system in case we did this before
restaurants = {}
italian_restaurants = {}
location_restaurants = []
loaded = False
try:
    with open('restaurants_350.pkl', 'rb') as f:
        restaurants = pickle.load(f)
    with open('italian_restaurants_350.pkl', 'rb') as f:
        italian_restaurants = pickle.load(f)
    with open('location_restaurants_350.pkl', 'rb') as f:
        location_restaurants = pickle.load(f)
    print('Restaurant data loaded.')
    loaded = True
except:
    pass

# If load failed use the Foursquare API to get the data
if not loaded:
    restaurants, italian_restaurants, location_restaurants = get_restaurants(latitudes, longitudes)
    
    # Let's persists this in local file system
    with open('restaurants_350.pkl', 'wb') as f:
        pickle.dump(restaurants, f)
    with open('italian_restaurants_350.pkl', 'wb') as f:
        pickle.dump(italian_restaurants, f)
    with open('location_restaurants_350.pkl', 'wb') as f:
        pickle.dump(location_restaurants, f)

Obtaining venues around candidate locations: . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . done.


In [19]:
import numpy as np

print('Total number of restaurants:', len(restaurants))
print('Total number of Italian restaurants:', len(italian_restaurants))
print('Percentage of Italian restaurants: {:.2f}%'.format(len(italian_restaurants) / len(restaurants) * 100))
print('Average number of restaurants in neighborhood:', np.array([len(r) for r in location_restaurants]).mean())

Total number of restaurants: 2122
Total number of Italian restaurants: 310
Percentage of Italian restaurants: 14.61%
Average number of restaurants in neighborhood: 5.1401098901098905


In [20]:
print('List of all restaurants')
print('-----------------------')
for r in list(restaurants.values())[:10]:
    print(r)
print('...')
print('Total:', len(restaurants))

List of all restaurants
-----------------------
('4c49b2c520ab1b8de2062117', 'Trattoria Toscana', 52.4715056699932, 13.385329206997389, 'Tempelhofer Damm 104 (Hoeppnerstr.), 12099 Berlin', 264, True, 390324.0734948359, 5814708.250705032)
('529c75dc498e5f28c08db637', 'Orient Food', 52.468418, 13.385631, 'Tempelhofer damm 124, 12099 Berlin', 280, False, 390336.8958508584, 5814364.380615842)
('4fc6a39be4b0b925af772392', 'Lodos Bistro', 52.47099391193633, 13.385207714672129, 'Berlin', 245, False, 390314.54923433566, 5814651.517235505)
('52b5d5d0498edc50653df469', 'Yasaka-Sushi', 52.46877834428962, 13.385458971242336, 'Tempelhofer Damm 124, 12099 Berlin', 263, False, 390326.1066713872, 5814404.719417979)
('4e68e3beae60186280aeda86', 'Mahatma', 52.47215311646854, 13.38533023091156, 'Tempelhofer Damm 102 (Manfred-von-Richthofen-Str.), 12101 Berlin', 308, False, 390325.75282294105, 5814780.258426609)
('4f53b76de4b0754d59050702', 'Yoko Sushi', 52.46798, 13.38516391, 'Tempelhofer Damm 128, 12099

In [21]:
print('List of Italian restaurants')
print('---------------------------')
for r in list(italian_restaurants.values())[:10]:
    print(r)
print('...')
print('Total:', len(italian_restaurants))

List of Italian restaurants
---------------------------
('4c49b2c520ab1b8de2062117', 'Trattoria Toscana', 52.4715056699932, 13.385329206997389, 'Tempelhofer Damm 104 (Hoeppnerstr.), 12099 Berlin', 264, True, 390324.0734948359, 5814708.250705032)
('4e7793c48130fa6bd3b581ff', 'Ninì e Pettirosso', 52.470317, 13.43698, 'Selkestr. 27, 12051 Berlin', 316, True, 393829.34645155125, 5814498.875631407)
('52c863ca498e73da17e59cb9', 'Caligari', 52.475843, 13.423658, 'Kienitzerstr. 110, 12049 Berlin', 307, True, 392937.9001831622, 5815133.146853393)
('514e2368e4b03aaaf1b5055c', 'La Pecora Nera', 52.477266, 13.421193, 'Herrfurthplatz 6, 12049 Berlin', 344, True, 392773.95274748007, 5815295.070954637)
('55a69d78498eeec7e1b94b3b', 'Bottega No. 6', 52.478164, 13.438749, 'Richardstr. 6, 12043 Berlin', 285, True, 393968.3663740409, 5815369.029899831)
('51dc5936498ed7f9f36a0f04', 'Su Nuraghe', 52.47474284089369, 13.446922302246094, 'Richardplatz 1, Berlin', 124, True, 394515.235132202, 5814976.556749946)

In [22]:
print('Restaurants around location')
print('---------------------------')
for i in range(100, 110):
    rs = location_restaurants[i][:8]
    names = ', '.join([r[1] for r in rs])
    print('Restaurants around location {}: {}'.format(i+1, names))

Restaurants around location
---------------------------
Restaurants around location 101: Mabuhay, Scandic Restaurant, Tischlein deck dich
Restaurants around location 102: Solar, Mirami - Sushi & Asia fusion cuisin, Layla, Ristorante Marinelli, Mexican, Cucina Italiana, Restaurant Hof zwei, Morélos Steakhaus & Cocktailbar
Restaurants around location 103: NaNum, Mama Cook, Paracas, Charlotte 1, Café Nullpunkt, Nobelhart & Schmutzig, Trattoria da Vinci, Steakhaus Asador
Restaurants around location 104: 
Restaurants around location 105: Pacifico, Shishi
Restaurants around location 106: Die Henne, Santa Maria, Chikogi, Zur kleinen Markthalle, Habibi, Cevichería, Parantez, Maroush
Restaurants around location 107: La Piadina, 3 Schwestern, Der Goldene Hahn, Trattoria Marechiaro, Weltrestaurant Markthalle, Long March Canteen, Olive
Restaurants around location 108: Salumeria Lamuri, Restaurant Richard
Restaurants around location 109: Vincent Vegan, Michelberger Restaurant, Tony Roma's, Scheers 

Let's now see all the collected restaurants in our area of interest on map, and let's also show Italian restaurants in different color

In [23]:
map_berlin = folium.Map(location=berlin_center, zoom_start=13)
folium.Marker(berlin_center, popup='Alexanderplatz').add_to(map_berlin)
for res in restaurants.values():
    lat = res[2]; lon = res[3]
    is_italian = res[6]
    color = 'red' if is_italian else 'blue'
    folium.CircleMarker([lat, lon], radius=3, color=color, fill=True, fill_color=color, fill_opacity=1).add_to(map_berlin)
map_berlin

##  Analysis

Let's perform some basic explanatory data analysis and derive some additional info from our raw data. First let's count the number of restaurants in every area candidate:


In [24]:
location_restaurants_count = [len(res) for res in location_restaurants]

df_locations['Restaurants in area'] = location_restaurants_count

print('Average number of restaurants in every area with radius=300m:', np.array(location_restaurants_count).mean())

df_locations.head(10)

Average number of restaurants in every area with radius=300m: 5.1401098901098905


Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area
0,"Ringbahnstraße 50, 12099 Berlin",52.470194,13.388575,390541.280176,5814557.0,5992.495307,5
1,"09R/27L, 12101 Berlin",52.470314,13.397404,391141.280176,5814557.0,5840.3767,0
2,"09R/27L, 12049 Berlin",52.470434,13.406234,391741.280176,5814557.0,5747.173218,0
3,"Oderstraße 174, 12049 Berlin",52.470552,13.415063,392341.280176,5814557.0,5715.767665,0
4,"Warthestraße 23A, 12051 Berlin",52.47067,13.423893,392941.280176,5814557.0,5747.173218,1
5,"Altenbraker Str. 15, 12053 Berlin",52.470788,13.432722,393541.280176,5814557.0,5840.3767,7
6,"Karl-Marx-Straße 212, 12055 Berlin",52.470904,13.441552,394141.280176,5814557.0,5992.495307,6
7,"Hessenring 34, 12101 Berlin",52.474683,13.375159,389641.280176,5815077.0,5855.766389,0
8,"Kleineweg 125, 12101 Berlin",52.474804,13.383989,390241.280176,5815077.0,5604.462508,0
9,"09L/27R, 12101 Berlin",52.474924,13.39282,390841.280176,5815077.0,5408.326913,0


OK, now let's calculate the distance to nearest Italian restaurant from every area candidate center (not only those within 300m - we want distance to closest one, regardless of how distant it is).

In [25]:
distances_to_italian_restaurant = []

for area_x, area_y in zip(xs, ys):
    min_distance = 10000
    for res in italian_restaurants.values():
        res_x = res[7]
        res_y = res[8]
        d = calc_xy_distance(area_x, area_y, res_x, res_y)
        if d<min_distance:
            min_distance = d
    distances_to_italian_restaurant.append(min_distance)

df_locations['Distance to Italian restaurant'] = distances_to_italian_restaurant

In [26]:
df_locations.head(10)

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area,Distance to Italian restaurant
0,"Ringbahnstraße 50, 12099 Berlin",52.470194,13.388575,390541.280176,5814557.0,5992.495307,5,264.408532
1,"09R/27L, 12101 Berlin",52.470314,13.397404,391141.280176,5814557.0,5840.3767,0,830.999331
2,"09R/27L, 12049 Berlin",52.470434,13.406234,391741.280176,5814557.0,5747.173218,0,1269.038823
3,"Oderstraße 174, 12049 Berlin",52.470552,13.415063,392341.280176,5814557.0,5715.767665,0,829.067436
4,"Warthestraße 23A, 12051 Berlin",52.47067,13.423893,392941.280176,5814557.0,5747.173218,1,575.681166
5,"Altenbraker Str. 15, 12053 Berlin",52.470788,13.432722,393541.280176,5814557.0,5840.3767,7,293.966217
6,"Karl-Marx-Straße 212, 12055 Berlin",52.470904,13.441552,394141.280176,5814557.0,5992.495307,6,317.390305
7,"Hessenring 34, 12101 Berlin",52.474683,13.375159,389641.280176,5815077.0,5855.766389,0,776.047531
8,"Kleineweg 125, 12101 Berlin",52.474804,13.383989,390241.280176,5815077.0,5604.462508,0,378.018237
9,"09L/27R, 12101 Berlin",52.474924,13.39282,390841.280176,5815077.0,5408.326913,0,635.252552


In [27]:
print('Average distance to closest Italian restaurant from each area center:', df_locations['Distance to Italian restaurant'].mean())

Average distance to closest Italian restaurant from each area center: 491.66885234457226


In [28]:
berlin_boroughs_url = 'https://raw.githubusercontent.com/m-hoerz/berlin-shapes/master/berliner-bezirke.geojson'
berlin_boroughs = requests.get(berlin_boroughs_url).json()

def boroughs_style(feature):
    return { 'color': 'blue', 'fill': False }

In [29]:
restaurant_latlons = [[res[2], res[3]] for res in restaurants.values()]

italian_latlons = [[res[2], res[3]] for res in italian_restaurants.values()]

In [30]:


from folium import plugins
from folium.plugins import HeatMap

map_berlin = folium.Map(location=berlin_center, zoom_start=13)
folium.TileLayer('cartodbpositron').add_to(map_berlin) #cartodbpositron cartodbdark_matter
HeatMap(restaurant_latlons).add_to(map_berlin)
folium.Marker(berlin_center).add_to(map_berlin)
folium.Circle(berlin_center, radius=1000, fill=False, color='white').add_to(map_berlin)
folium.Circle(berlin_center, radius=2000, fill=False, color='white').add_to(map_berlin)
folium.Circle(berlin_center, radius=3000, fill=False, color='white').add_to(map_berlin)
folium.GeoJson(berlin_boroughs, style_function=boroughs_style, name='geojson').add_to(map_berlin)
map_berlin



In [31]:
map_berlin = folium.Map(location=berlin_center, zoom_start=13)
folium.TileLayer('cartodbpositron').add_to(map_berlin) #cartodbpositron cartodbdark_matter
HeatMap(italian_latlons).add_to(map_berlin)
folium.Marker(berlin_center).add_to(map_berlin)
folium.Circle(berlin_center, radius=1000, fill=False, color='white').add_to(map_berlin)
folium.Circle(berlin_center, radius=2000, fill=False, color='white').add_to(map_berlin)
folium.Circle(berlin_center, radius=3000, fill=False, color='white').add_to(map_berlin)
folium.GeoJson(berlin_boroughs, style_function=boroughs_style, name='geojson').add_to(map_berlin)
map_berlin

Let's define new, more narrow region of interest, which will include low-restaurant-count parts of Kreuzberg and Friedrichshain closest to Alexanderplatz

In [32]:


roi_x_min = berlin_center_x - 2000
roi_y_max = berlin_center_y + 1000
roi_width = 5000
roi_height = 5000
roi_center_x = roi_x_min + 2500
roi_center_y = roi_y_max - 2500
roi_center_lon, roi_center_lat = xy_to_lonlat(roi_center_x, roi_center_y)
roi_center = [roi_center_lat, roi_center_lon]

map_berlin = folium.Map(location=roi_center, zoom_start=14)
HeatMap(restaurant_latlons).add_to(map_berlin)
folium.Marker(berlin_center).add_to(map_berlin)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.4).add_to(map_berlin)
folium.GeoJson(berlin_boroughs, style_function=boroughs_style, name='geojson').add_to(map_berlin)
map_berlin





Not bad - this nicely covers all the pockets of low restaurant density in Kreuzberg and Friedrichshain closest to Berlin center.

Let's also create new, more dense grid of location candidates restricted to our new region of interest (let's make our location candidates 100m appart).


In [33]:
k = math.sqrt(3) / 2 # Vertical offset for hexagonal grid cells
x_step = 100
y_step = 100 * k 
roi_y_min = roi_center_y - 2500

roi_latitudes = []
roi_longitudes = []
roi_xs = []
roi_ys = []
for i in range(0, int(51/k)):
    y = roi_y_min + i * y_step
    x_offset = 50 if i%2==0 else 0
    for j in range(0, 51):
        x = roi_x_min + j * x_step + x_offset
        d = calc_xy_distance(roi_center_x, roi_center_y, x, y)
        if (d <= 2501):
            lon, lat = xy_to_lonlat(x, y)
            roi_latitudes.append(lat)
            roi_longitudes.append(lon)
            roi_xs.append(x)
            roi_ys.append(y)

print(len(roi_latitudes), 'candidate neighborhood centers generated.')

2261 candidate neighborhood centers generated.


OK. Now let's calculate two most important things for each location candidate: number of restaurants in vicinity (we'll use radius of 250 meters) and distance to closest Italian restaurant.

In [34]:
def count_restaurants_nearby(x, y, restaurants, radius=250):    
    count = 0
    for res in restaurants.values():
        res_x = res[7]; res_y = res[8]
        d = calc_xy_distance(x, y, res_x, res_y)
        if d<=radius:
            count += 1
    return count

def find_nearest_restaurant(x, y, restaurants):
    d_min = 100000
    for res in restaurants.values():
        res_x = res[7]; res_y = res[8]
        d = calc_xy_distance(x, y, res_x, res_y)
        if d<=d_min:
            d_min = d
    return d_min

roi_restaurant_counts = []
roi_italian_distances = []

print('Generating data on location candidates... ', end='')
for x, y in zip(roi_xs, roi_ys):
    count = count_restaurants_nearby(x, y, restaurants, radius=250)
    roi_restaurant_counts.append(count)
    distance = find_nearest_restaurant(x, y, italian_restaurants)
    roi_italian_distances.append(distance)
print('done.')

Generating data on location candidates... done.


In [35]:
# Let's put this into dataframe
df_roi_locations = pd.DataFrame({'Latitude':roi_latitudes,
                                 'Longitude':roi_longitudes,
                                 'X':roi_xs,
                                 'Y':roi_ys,
                                 'Restaurants nearby':roi_restaurant_counts,
                                 'Distance to Italian restaurant':roi_italian_distances})

df_roi_locations.head(10)

Unnamed: 0,Latitude,Longitude,X,Y,Restaurants nearby,Distance to Italian restaurant
0,52.48606,13.421133,392791.280176,5816273.0,6,158.565914
1,52.48608,13.422605,392891.280176,5816273.0,7,180.522171
2,52.48673,13.413009,392241.280176,5816360.0,0,523.935806
3,52.48675,13.414481,392341.280176,5816360.0,0,468.257436
4,52.486769,13.415953,392441.280176,5816360.0,0,369.743331
5,52.486789,13.417425,392541.280176,5816360.0,2,272.314591
6,52.486809,13.418897,392641.280176,5816360.0,4,177.764848
7,52.486829,13.420369,392741.280176,5816360.0,6,95.107551
8,52.486848,13.421841,392841.280176,5816360.0,6,80.563958
9,52.486868,13.423314,392941.280176,5816360.0,9,154.711526


OK. Let us now filter those locations: we're interested only in locations with no more than two restaurants in radius of 250 meters, and no Italian restaurants in radius of 400 meters.

In [36]:
good_res_count = np.array((df_roi_locations['Restaurants nearby']<=2))
print('Locations with no more than two restaurants nearby:', good_res_count.sum())

good_ita_distance = np.array(df_roi_locations['Distance to Italian restaurant']>=400)
print('Locations with no Italian restaurants within 400m:', good_ita_distance.sum())

good_locations = np.logical_and(good_res_count, good_ita_distance)
print('Locations with both conditions met:', good_locations.sum())

df_good_locations = df_roi_locations[good_locations]

Locations with no more than two restaurants nearby: 773
Locations with no Italian restaurants within 400m: 362
Locations with both conditions met: 292


In [37]:
good_latitudes = df_good_locations['Latitude'].values
good_longitudes = df_good_locations['Longitude'].values

good_locations = [[lat, lon] for lat, lon in zip(good_latitudes, good_longitudes)]

map_berlin = folium.Map(location=roi_center, zoom_start=14)
folium.TileLayer('cartodbpositron').add_to(map_berlin)
HeatMap(restaurant_latlons).add_to(map_berlin)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.6).add_to(map_berlin)
folium.Marker(berlin_center).add_to(map_berlin)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin) 
folium.GeoJson(berlin_boroughs, style_function=boroughs_style, name='geojson').add_to(map_berlin)
map_berlin

In [38]:
from sklearn.cluster import KMeans

number_of_clusters = 15

good_xys = df_good_locations[['X', 'Y']].values
kmeans = KMeans(n_clusters=number_of_clusters, random_state=0).fit(good_xys)

cluster_centers = [xy_to_lonlat(cc[0], cc[1]) for cc in kmeans.cluster_centers_]

map_berlin = folium.Map(location=roi_center, zoom_start=14)
folium.TileLayer('cartodbpositron').add_to(map_berlin)
HeatMap(restaurant_latlons).add_to(map_berlin)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.4).add_to(map_berlin)
folium.Marker(berlin_center).add_to(map_berlin)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=True, fill_opacity=0.25).add_to(map_berlin) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin)
folium.GeoJson(berlin_boroughs, style_function=boroughs_style, name='geojson').add_to(map_berlin)
map_berlin

In [39]:
map_berlin = folium.Map(location=roi_center, zoom_start=14)
folium.Marker(berlin_center).add_to(map_berlin)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#00000000', fill=True, fill_color='#0066ff', fill_opacity=0.07).add_to(map_berlin)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=False).add_to(map_berlin) 
folium.GeoJson(berlin_boroughs, style_function=boroughs_style, name='geojson').add_to(map_berlin)
map_berlin

Let's zoom in on candidate areas in Kreuzberg:

In [40]:
map_berlin = folium.Map(location=[52.498972, 13.409591], zoom_start=15)
folium.Marker(berlin_center).add_to(map_berlin)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=False).add_to(map_berlin) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#0000ff00', fill=True, fill_color='#0066ff', fill_opacity=0.07).add_to(map_berlin)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin)
folium.GeoJson(berlin_boroughs, style_function=boroughs_style, name='geojson').add_to(map_berlin)
map_berlin

..and candidate areas in Friedrichshain:

In [41]:


map_berlin = folium.Map(location=[52.516347, 13.428403], zoom_start=15)
folium.Marker(berlin_center).add_to(map_berlin)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=False).add_to(map_berlin) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#0000ff00', fill=True, fill_color='#0066ff', fill_opacity=0.07).add_to(map_berlin)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin)
folium.GeoJson(berlin_boroughs, style_function=boroughs_style, name='geojson').add_to(map_berlin)
map_berlin



In [42]:
candidate_area_addresses = []
print('==============================================================')
print('Addresses of centers of areas recommended for further analysis')
print('==============================================================\n')
for lon, lat in cluster_centers:
    addr = get_address(google_api_key, lat, lon).replace(', Germany', '')
    candidate_area_addresses.append(addr)    
    x, y = lonlat_to_xy(lon, lat)
    d = calc_xy_distance(x, y, berlin_center_x, berlin_center_y)
    print('{}{} => {:.1f}km from Alexanderplatz'.format(addr, ' '*(50-len(addr)), d/1000))

Addresses of centers of areas recommended for further analysis

Vor dem Schlesischen Tor 2, 10997 Berlin           => 3.7km from Alexanderplatz
Stallschreiberstraße 46, 10969 Berlin              => 1.7km from Alexanderplatz
Platz der Vereinten Nationen 25, 10249 Berlin      => 1.0km from Alexanderplatz
Köpenicker Str. 40, 10179 Berlin                   => 1.6km from Alexanderplatz
Dog Park, Hasenheide 82, 10967 Berlin              => 3.9km from Alexanderplatz
Prinzenstraße 1, 10969 Berlin                      => 2.8km from Alexanderplatz
Breite Str. 1, 10178 Berlin                        => 1.0km from Alexanderplatz
Landsberger Allee 37, 10249 Berlin                 => 1.9km from Alexanderplatz
Holzmarktstraße 55, 10179 Berlin                   => 1.0km from Alexanderplatz
Berolinastraße 14, 10178 Berlin                    => 0.5km from Alexanderplatz
Neuenburger Str. 16, 10969 Berlin                  => 2.7km from Alexanderplatz
Michaelkirchpl. 23, 10179 Berlin                   => 1.

In [43]:
map_berlin = folium.Map(location=roi_center, zoom_start=14)
folium.Circle(berlin_center, radius=50, color='red', fill=True, fill_color='red', fill_opacity=1).add_to(map_berlin)
for lonlat, addr in zip(cluster_centers, candidate_area_addresses):
    folium.Marker([lonlat[1], lonlat[0]], popup=addr).add_to(map_berlin) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#0000ff00', fill=True, fill_color='#0066ff', fill_opacity=0.05).add_to(map_berlin)
map_berlin