Azure key hidden

In [86]:
import requests, json
import pandas as pd
pd.set_option('display.max_colwidth', -1)

query = r'London, UK'

def get_coordinates(query, key):
    response = requests.get('https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=' + key + '&query=' + query)
    jsondata = response.json()['results']
    df = pd.DataFrame(jsondata)
    df = df[df['type']=='Geography']['position']
    lat = df[0]['lat']
    lon = df[0]['lon']
    return [lat, lon]

center = get_coordinates(query, key)

In [85]:
#!pip install shapely
import shapely.geometry

#!pip install pyproj
import pyproj

import math

def lonlat_to_xy(lon, lat):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    xy = pyproj.transform(proj_latlon, proj_xy, lon, lat)
    return xy[0], xy[1]

def xy_to_lonlat(x, y):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    lonlat = pyproj.transform(proj_xy, proj_latlon, x, y)
    return lonlat[0], lonlat[1]

def calc_xy_distance(x1, y1, x2, y2):
    dx = x2 - x1
    dy = y2 - y1
    return math.sqrt(dx*dx + dy*dy)

print('Coordinate transformation check')
print('-------------------------------')
print('London center longitude={}, latitude={}'.format(center[1], center[0]))
x, y = lonlat_to_xy(center[1], center[0])
print('London center UTM X={}, Y={}'.format(x, y))
lo, la = xy_to_lonlat(x, y)
print('London center longitude={}, latitude={}'.format(lo, la))

Coordinate transformation check
-------------------------------
London center longitude=-0.12624, latitude=51.50015
London center UTM X=-547082.9460837919, Y=5814745.518286032
London center longitude=-0.12623999999999394, latitude=51.50014999999999


In [87]:
center_x, center_y = lonlat_to_xy(center[1], center[0]) # City center in Cartesian coordinates

k = math.sqrt(3) / 2 # Vertical offset for hexagonal grid cells
x_min = center_x - 6000
x_step = 600
y_min = center_y - 6000 - (int(21/k)*k*600 - 12000)/2
y_step = 600 * k 

latitudes = []
longitudes = []
distances_from_center = []
xs = []
ys = []
for i in range(0, int(21/k)):
    y = y_min + i * y_step
    x_offset = 300 if i%2==0 else 0
    for j in range(0, 21):
        x = x_min + j * x_step + x_offset
        distance_from_center = calc_xy_distance(center_x, center_y, x, y)
        if (distance_from_center <= 6001):
            lon, lat = xy_to_lonlat(x, y)
            latitudes.append(lat)
            longitudes.append(lon)
            distances_from_center.append(distance_from_center)
            xs.append(x)
            ys.append(y)

print(len(latitudes), 'candidate neighborhood centers generated.')

364 candidate neighborhood centers generated.


In [None]:
#!pip install folium

import folium

map_show = folium.Map(location=center, zoom_start=13)
folium.Marker(center, popup='Lambeth').add_to(map_show)
for lat, lon in zip(latitudes, longitudes):
    #folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin) 
    folium.Circle([lat, lon], radius=300, color='blue', fill=False).add_to(map_show)
    #folium.Marker([lat, lon]).add_to(map_show)

In [89]:
map_show

In [90]:
def get_address(key, latitude, longitude):
    try:
        response = requests.get('https://atlas.microsoft.com/search/address/reverse/json?api-version=1.0&subscription-key=' + key + 
                                '&query=' + str(latitude) + ',' + str(longitude))
        results = response.json()['addresses'][0]
        address = results['address']['freeformAddress']
        return address
    except:
        return None

addr = get_address(key, center[0], center[1])
print('Reverse geocoding check')
print('-----------------------')
print('Address of [{}, {}] is: {}'.format(center[0], center[1], addr))

Reverse geocoding check
-----------------------
Address of [51.50015, -0.12624] is: Saint Margaret Street, London, SW1P 3


In [91]:
print('Obtaining location addresses: ', end='')
addresses = []
for lat, lon in zip(latitudes, longitudes):
    address = get_address(key, lat, lon)
    if address is None:
        address = 'NO ADDRESS'
    #address = address.replace(', Germany', '') # We don't need country part of address
    addresses.append(address)
    print(' .', end='')
print(' done.')

Obtaining location addresses:  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . done.


In [92]:
addresses[150:170]

['71 West Lane, London, SE16 4LY',
 '31 Brunel Road, London, SE16 4LA',
 '(18 - 10) Radley Court, London, SE16 6AJ',
 '51 Lillie Road, London, SW6 1UF',
 '124C Finborough Road, London, SW10 9AW',
 '29 Gilston Road, London, SW10 9SS',
 '(2 - 4) South Parade, London, SW3 6LP',
 '35 Jubilee Place, London, SW3 3TB',
 '76 Duke of York Square, London, SW3 4LY',
 'London, SW1W 9',
 '54 Wilton Road, London, SW1V 1DT',
 '28 Maunsel Street, London, SW1P 2QN',
 '9 Millbank, London, SW1P 3JA',
 '45 Carlisle Lane, London, SE1 7LE',
 "Saint George's Road, London, SE1 6HX",
 '86 Borough Road, London, SE1 0AA',
 'Nebraska Street, London, SE1 4LT',
 'London, SE1 3',
 '26 Curlew Street, London, SE1 2ND',
 'River Bus Commuter Service (RB1), London']

In [108]:
df_locations = pd.DataFrame({'Address': addresses,
                             'Latitude': latitudes,
                             'Longitude': longitudes,
                             'X': xs,
                             'Y': ys,
                             'Distance from center': distances_from_center})

df_locations.head(10)

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center
0,"10 Scrutton Close, London, SW12 0AW",51.447233,-0.134452,-548882.946084,5809030.0,5992.495307
1,"London, SW2 5",51.448335,-0.126118,-548282.946084,5809030.0,5840.3767
2,"68 Elm Park, London, SW2 2TZ",51.449435,-0.117783,-547682.946084,5809030.0,5747.173218
3,"23 Brockwell Park Row, London, SW2 2YJ",51.450536,-0.109447,-547082.946084,5809030.0,5715.767665
4,"39 Norwood Road, London, SE24 9QG",51.451635,-0.101111,-546482.946084,5809030.0,5747.173218
5,"90 Delawyk Crescent, London, SE24 9JD",51.452734,-0.092774,-545882.946084,5809030.0,5840.3767
6,"5 Great Spilmans, London, SE22 8SZ",51.453833,-0.084437,-545282.946084,5809030.0,5992.495307
7,"8 Clarence Mews, London, SW12 9SR",51.45009,-0.148481,-549782.946084,5809549.0,5855.766389
8,"Oaklands Estate, London, SW4 8AH",51.451193,-0.140146,-549182.946084,5809549.0,5604.462508
9,"147 Clarence Crescent, London, SW4 8LN",51.452294,-0.131811,-548582.946084,5809549.0,5408.326913


In [94]:
df_locations.to_pickle('./locations.pkl')   

Foursquare client id & secret hidden

In [110]:
food_category = '4d4b7105d754a06374d81259' # 'Root' category for all food-related venues

italian_restaurant_categories = ['4bf58dd8d48988d110941735','55a5a1ebe4b013909087cbb6','55a5a1ebe4b013909087cb7c',
                                 '55a5a1ebe4b013909087cba7','55a5a1ebe4b013909087cba1','55a5a1ebe4b013909087cba4',
                                 '55a5a1ebe4b013909087cb95','55a5a1ebe4b013909087cb89','55a5a1ebe4b013909087cb9b',
                                 '55a5a1ebe4b013909087cb98','55a5a1ebe4b013909087cbbf','55a5a1ebe4b013909087cb79',
                                 '55a5a1ebe4b013909087cbb0','55a5a1ebe4b013909087cbb3','55a5a1ebe4b013909087cb74',
                                 '55a5a1ebe4b013909087cbaa','55a5a1ebe4b013909087cb83','55a5a1ebe4b013909087cb8c',
                                 '55a5a1ebe4b013909087cb92','55a5a1ebe4b013909087cb8f','55a5a1ebe4b013909087cb86',
                                 '55a5a1ebe4b013909087cbb9','55a5a1ebe4b013909087cb7f','55a5a1ebe4b013909087cbbc',
                                 '55a5a1ebe4b013909087cb9e','55a5a1ebe4b013909087cbc2','55a5a1ebe4b013909087cbad']

def is_restaurant(categories, specific_filter=None):
    restaurant_words = ['restaurant', 'diner', 'taverna', 'steakhouse']
    restaurant = False
    specific = False
    for c in categories:
        category_name = c[0].lower()
        category_id = c[1]
        for r in restaurant_words:
            if r in category_name:
                restaurant = True
        if 'fast food' in category_name:
            restaurant = False
        if not(specific_filter is None) and (category_id in specific_filter):
            specific = True
            restaurant = True
    return restaurant, specific

def get_categories(categories):
    return [(cat['name'], cat['id']) for cat in categories]

def format_address(location):
    address = ', '.join(location['formattedAddress'])
    #address = address.replace(', Deutschland', '')
    #address = address.replace(', Germany', '')
    return address

def get_venues_near_location(lat, lon, category, client_id, client_secret, radius=500, limit=100):
    version = '20180724'
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
        client_id, client_secret, version, lat, lon, category, radius, limit)
    try:
        results = requests.get(url).json()['response']['groups'][0]['items']
        venues = [(item['venue']['id'],
                   item['venue']['name'],
                   get_categories(item['venue']['categories']),
                   (item['venue']['location']['lat'], item['venue']['location']['lng']),
                   format_address(item['venue']['location']),
                   item['venue']['location']['distance']) for item in results]        
    except:
        venues = []
    return venues

In [111]:
# Let's now go over our neighborhood locations and get nearby restaurants; we'll also maintain a dictionary of all found restaurants and all found italian restaurants

import pickle

def get_restaurants(lats, lons):
    restaurants = {}
    italian_restaurants = {}
    location_restaurants = []

    print('Obtaining venues around candidate locations:', end='')
    for lat, lon in zip(lats, lons):
        # Using radius=350 to meke sure we have overlaps/full coverage so we don't miss any restaurant (we're using dictionaries to remove any duplicates resulting from area overlaps)
        venues = get_venues_near_location(lat, lon, food_category, client_id, client_secret, radius=350, limit=100)
        area_restaurants = []
        for venue in venues:
            venue_id = venue[0]
            venue_name = venue[1]
            venue_categories = venue[2]
            venue_latlon = venue[3]
            venue_address = venue[4]
            venue_distance = venue[5]
            is_res, is_italian = is_restaurant(venue_categories, specific_filter=italian_restaurant_categories)
            if is_res:
                x, y = lonlat_to_xy(venue_latlon[1], venue_latlon[0])
                restaurant = (venue_id, venue_name, venue_latlon[0], venue_latlon[1], venue_address, venue_distance, is_italian, x, y)
                if venue_distance<=300:
                    area_restaurants.append(restaurant)
                restaurants[venue_id] = restaurant
                if is_italian:
                    italian_restaurants[venue_id] = restaurant
        location_restaurants.append(area_restaurants)
        print(' .', end='')
    print(' done.')
    return restaurants, italian_restaurants, location_restaurants

# Try to load from local file system in case we did this before
restaurants = {}
italian_restaurants = {}
location_restaurants = []
loaded = False
try:
    with open('restaurants_350.pkl', 'rb') as f:
        restaurants = pickle.load(f)
    with open('italian_restaurants_350.pkl', 'rb') as f:
        italian_restaurants = pickle.load(f)
    with open('location_restaurants_350.pkl', 'rb') as f:
        location_restaurants = pickle.load(f)
    print('Restaurant data loaded.')
    loaded = True
except:
    pass

# If load failed use the Foursquare API to get the data
if not loaded:
    restaurants, italian_restaurants, location_restaurants = get_restaurants(latitudes, longitudes)
    
    # Let's persists this in local file system
    with open('restaurants_350.pkl', 'wb') as f:
        pickle.dump(restaurants, f)
    with open('italian_restaurants_350.pkl', 'wb') as f:
        pickle.dump(italian_restaurants, f)
    with open('location_restaurants_350.pkl', 'wb') as f:
        pickle.dump(location_restaurants, f)
        

Obtaining venues around candidate locations: . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . done.


In [112]:
import numpy as np

print('Total number of restaurants:', len(restaurants))
print('Total number of Italian restaurants:', len(italian_restaurants))
print('Percentage of Italian restaurants: {:.2f}%'.format(len(italian_restaurants) / len(restaurants) * 100))
print('Average number of restaurants in neighborhood:', np.array([len(r) for r in location_restaurants]).mean())

Total number of restaurants: 2814
Total number of Italian restaurants: 412
Percentage of Italian restaurants: 14.64%
Average number of restaurants in neighborhood: 6.851648351648351


In [113]:
print('List of all restaurants')
print('-----------------------')
for r in list(restaurants.values())[:5]:
    print(r)
print('...')
print('Total:', len(restaurants))

List of all restaurants
-----------------------
('4c2e38bf7d85a593eef553f3', 'Luanda Grill', 51.44742303467162, -0.12437808709929998, '256a Brixton Hill, Brixton, Greater London, SW2 1HF, United Kingdom', 157, False, -548184.3357480523, 5808903.871061945)
('4b75e723f964a520682d2ee3', 'Bombay Inn', 51.44909635767457, -0.12398269054034235, '252 Brixton Hill, Brixton Hill, Greater London, SW2 1HF, United Kingdom', 170, False, -548118.0828736055, 5809082.644562421)
('4dcd81fcd22deadedd3acafc', 'Wing Fu', 51.445970117008564, -0.12436866760253906, '282 Brixton Hill (at Morrish Rd), Brixton, Greater London, SW2 1HT, United Kingdom', 289, False, -548217.5553274208, 5808743.506958647)
('4c8769706e65199c44f75c53', 'Ten Thanks', 51.4486955597945, -0.12050062020187428, '31 Upper Tulse Hill (Ostade Rd & Wimbart Rd), Brixton, Greater London, SW2 2SD, United Kingdom', 205, False, -547887.484229397, 5808987.73223665)
('4e8a3a52cc215b0367fe9b76', 'The Best Kebab', 51.45267269321684, -0.1021205904539633

In [114]:
print('List of Italian restaurants')
print('---------------------------')
for r in list(italian_restaurants.values())[:5]:
    print(r)
print('...')
print('Total:', len(italian_restaurants))

List of Italian restaurants
---------------------------
('56cec0facd101996cd7e447c', 'le poisson de mer deli', 51.45093682616033, -0.08584753442811514, 'United Kingdom', 336, True, -545447.4529130226, 5808730.878428245)
('4cd1c5c9f6378cfa8c87bdd6', 'Ciullosteria', 51.447144, -0.148981, '31 Balham High Rd., London, Greater London, SW12 9AL, United Kingdom', 329, True, -549886.2023925132, 5809231.772000254)
('4c13c86f77cea593bb6acf60', 'Il Mirto', 51.460941644493346, -0.07981452415344481, 'Melbourne Grove, East Dulwich, Greater London, SE2 2 8, United Kingdom', 338, True, -544799.1930421473, 5809746.625912474)
('500efe2be4b05190311f2e13', 'La Cucina', 51.453982, -0.163909, '175 Northcote Road (Honeywell Road), Battersea, Greater London, SW11 9QT, United Kingdom', 150, True, -550755.0148963695, 5810203.743583629)
('4d2cc7ebfd2e236a87b4eade', 'Gusto Fino', 51.45377654355899, -0.15632238769771659, 'United Kingdom', 151, True, -550237.1264437824, 5810070.316392073)
...
Total: 412


In [115]:
print('Restaurants around location')
print('---------------------------')
for i in range(100, 110):
    rs = location_restaurants[i][:8]
    names = ', '.join([r[1] for r in rs])
    print('Restaurants around location {}: {}'.format(i+1, names))

Restaurants around location
---------------------------
Restaurants around location 101: 
Restaurants around location 102: Boqueria, San Gennaro, Pho Ta, New City, Good Food House
Restaurants around location 103: Mihara, Mogul lite lunch
Restaurants around location 104: Linnaean, Darby’s
Restaurants around location 105: Hot Stuff, Harare Restaurant, The Three Lions, Mumbai Delight, Bar Estrela, Cafe Portugal, Tony's Delicatessen
Restaurants around location 106: 24 The Oval
Restaurants around location 107: Taro
Restaurants around location 108: 
Restaurants around location 109: La Luna, Bayroot, CheeMc
Restaurants around location 110: Rajah Tandoori and Curry


In [120]:
map_show = folium.Map(location=center, zoom_start=13)
folium.Marker(center, popup='Roman Bath').add_to(map_show)
for res in restaurants.values():
    lat = res[2]; lon = res[3]
    is_italian = res[6]
    color = 'red' if is_italian else 'blue'
    folium.CircleMarker([lat, lon], radius=3, color=color, fill=True, fill_color=color, fill_opacity=1).add_to(map_show)

In [121]:
map_show

In [122]:
location_restaurants_count = [len(res) for res in location_restaurants]

df_locations['Restaurants in area'] = location_restaurants_count

print('Average number of restaurants in every area with radius=300m:', np.array(location_restaurants_count).mean())

df_locations.head(20)

Average number of restaurants in every area with radius=300m: 6.851648351648351


Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area
0,"10 Scrutton Close, London, SW12 0AW",51.447233,-0.134452,-548882.946084,5809030.0,5992.495307,0
1,"London, SW2 5",51.448335,-0.126118,-548282.946084,5809030.0,5840.3767,3
2,"68 Elm Park, London, SW2 2TZ",51.449435,-0.117783,-547682.946084,5809030.0,5747.173218,1
3,"23 Brockwell Park Row, London, SW2 2YJ",51.450536,-0.109447,-547082.946084,5809030.0,5715.767665,0
4,"39 Norwood Road, London, SE24 9QG",51.451635,-0.101111,-546482.946084,5809030.0,5747.173218,3
5,"90 Delawyk Crescent, London, SE24 9JD",51.452734,-0.092774,-545882.946084,5809030.0,5840.3767,1
6,"5 Great Spilmans, London, SE22 8SZ",51.453833,-0.084437,-545282.946084,5809030.0,5992.495307,0
7,"8 Clarence Mews, London, SW12 9SR",51.45009,-0.148481,-549782.946084,5809549.0,5855.766389,3
8,"Oaklands Estate, London, SW4 8AH",51.451193,-0.140146,-549182.946084,5809549.0,5604.462508,0
9,"147 Clarence Crescent, London, SW4 8LN",51.452294,-0.131811,-548582.946084,5809549.0,5408.326913,2


In [123]:
distances_to_italian_restaurant = []

for area_x, area_y in zip(xs, ys):
    min_distance = 10000
    for res in italian_restaurants.values():
        res_x = res[7]
        res_y = res[8]
        d = calc_xy_distance(area_x, area_y, res_x, res_y)
        if d<min_distance:
            min_distance = d
    distances_to_italian_restaurant.append(min_distance)

df_locations['Distance to Italian restaurant'] = distances_to_italian_restaurant

In [124]:
df_locations.head(20)

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area,Distance to Italian restaurant
0,"10 Scrutton Close, London, SW12 0AW",51.447233,-0.134452,-548882.946084,5809030.0,5992.495307,0,1023.394283
1,"London, SW2 5",51.448335,-0.126118,-548282.946084,5809030.0,5840.3767,3,1615.934228
2,"68 Elm Park, London, SW2 2TZ",51.449435,-0.117783,-547682.946084,5809030.0,5747.173218,1,1528.942918
3,"23 Brockwell Park Row, London, SW2 2YJ",51.450536,-0.109447,-547082.946084,5809030.0,5715.767665,0,1355.775841
4,"39 Norwood Road, London, SE24 9QG",51.451635,-0.101111,-546482.946084,5809030.0,5747.173218,3,1077.761891
5,"90 Delawyk Crescent, London, SE24 9JD",51.452734,-0.092774,-545882.946084,5809030.0,5840.3767,1,528.184522
6,"5 Great Spilmans, London, SE22 8SZ",51.453833,-0.084437,-545282.946084,5809030.0,5992.495307,0,341.155514
7,"8 Clarence Mews, London, SW12 9SR",51.45009,-0.148481,-549782.946084,5809549.0,5855.766389,3,333.957673
8,"Oaklands Estate, London, SW4 8AH",51.451193,-0.140146,-549182.946084,5809549.0,5604.462508,0,771.644541
9,"147 Clarence Crescent, London, SW4 8LN",51.452294,-0.131811,-548582.946084,5809549.0,5408.326913,2,1190.348424


In [125]:
print('Average distance to closest Italian restaurant from each area center:', df_locations['Distance to Italian restaurant'].mean())

Average distance to closest Italian restaurant from each area center: 421.3015967921711


In [126]:
restaurant_latlons = [[res[2], res[3]] for res in restaurants.values()]

italian_latlons = [[res[2], res[3]] for res in italian_restaurants.values()]

In [132]:
from folium import plugins
from folium.plugins import HeatMap

map_show = folium.Map(location=center, zoom_start=13)
folium.TileLayer('cartodbpositron').add_to(map_show) #cartodbpositron cartodbdark_matter
HeatMap(restaurant_latlons).add_to(map_show)
folium.Marker(center).add_to(map_show)
folium.Circle(center, radius=1000, fill=False, color='white').add_to(map_show)
folium.Circle(center, radius=2000, fill=False, color='white').add_to(map_show)
folium.Circle(center, radius=3000, fill=False, color='white').add_to(map_show)
map_show

In [134]:
map_show = folium.Map(location=center, zoom_start=13)
folium.TileLayer('cartodbpositron').add_to(map_show) #cartodbpositron cartodbdark_matter
HeatMap(italian_latlons).add_to(map_show)
folium.Marker(center).add_to(map_show)
folium.Circle(center, radius=1000, fill=False, color='white').add_to(map_show)
folium.Circle(center, radius=2000, fill=False, color='white').add_to(map_show)
folium.Circle(center, radius=3000, fill=False, color='white').add_to(map_show)

<folium.features.Circle at 0x1925cdda5c8>

In [135]:
map_show

In [137]:
roi_x_min = center_x + 1000
roi_y_max = center_y
roi_width = 5000
roi_height = 5000
roi_center_x = roi_x_min + 250
roi_center_y = roi_y_max - 250
roi_center_lon, roi_center_lat = xy_to_lonlat(roi_center_x, roi_center_y)
roi_center = [roi_center_lat, roi_center_lon]

map_show = folium.Map(location=roi_center, zoom_start=14)
HeatMap(restaurant_latlons).add_to(map_show)
folium.Marker(center).add_to(map_show)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.4).add_to(map_show)
map_show

In [138]:
k = math.sqrt(3) / 2 # Vertical offset for hexagonal grid cells
x_step = 100
y_step = 100 * k 
roi_y_min = roi_center_y - 2500

roi_latitudes = []
roi_longitudes = []
roi_xs = []
roi_ys = []
for i in range(0, int(51/k)):
    y = roi_y_min + i * y_step
    x_offset = 50 if i%2==0 else 0
    for j in range(0, 51):
        x = roi_x_min + j * x_step + x_offset
        d = calc_xy_distance(roi_center_x, roi_center_y, x, y)
        if (d <= 2501):
            lon, lat = xy_to_lonlat(x, y)
            roi_latitudes.append(lat)
            roi_longitudes.append(lon)
            roi_xs.append(x)
            roi_ys.append(y)

print(len(roi_latitudes), 'candidate neighborhood centers generated.')

1292 candidate neighborhood centers generated.


In [139]:
def count_restaurants_nearby(x, y, restaurants, radius=250):    
    count = 0
    for res in restaurants.values():
        res_x = res[7]; res_y = res[8]
        d = calc_xy_distance(x, y, res_x, res_y)
        if d<=radius:
            count += 1
    return count

def find_nearest_restaurant(x, y, restaurants):
    d_min = 100000
    for res in restaurants.values():
        res_x = res[7]; res_y = res[8]
        d = calc_xy_distance(x, y, res_x, res_y)
        if d<=d_min:
            d_min = d
    return d_min

roi_restaurant_counts = []
roi_italian_distances = []

print('Generating data on location candidates... ', end='')
for x, y in zip(roi_xs, roi_ys):
    count = count_restaurants_nearby(x, y, restaurants, radius=250)
    roi_restaurant_counts.append(count)
    distance = find_nearest_restaurant(x, y, italian_restaurants)
    roi_italian_distances.append(distance)
print('done.')

Generating data on location candidates... done.


In [140]:
# Let's put this into dataframe
df_roi_locations = pd.DataFrame({'Latitude':roi_latitudes,
                                 'Longitude':roi_longitudes,
                                 'X':roi_xs,
                                 'Y':roi_ys,
                                 'Restaurants nearby':roi_restaurant_counts,
                                 'Distance to Italian restaurant':roi_italian_distances})

df_roi_locations.head(10)

Unnamed: 0,Latitude,Longitude,X,Y,Restaurants nearby,Distance to Italian restaurant
0,51.478572,-0.100778,-545832.946084,5811996.0,2,841.761585
1,51.478866,-0.104508,-546082.946084,5812082.0,1,994.928059
2,51.479049,-0.103118,-545982.946084,5812082.0,3,908.790951
3,51.479232,-0.101727,-545882.946084,5812082.0,3,825.784562
4,51.479415,-0.100337,-545782.946084,5812082.0,3,746.95334
5,51.479599,-0.098947,-545682.946084,5812082.0,1,673.764382
6,51.479782,-0.097557,-545582.946084,5812082.0,1,608.257834
7,51.479965,-0.096166,-545482.946084,5812082.0,2,553.169723
8,51.480148,-0.094776,-545382.946084,5812082.0,2,511.87488
9,51.480332,-0.093386,-545282.946084,5812082.0,2,487.888351


In [141]:
good_res_count = np.array((df_roi_locations['Restaurants nearby']<=2))
print('Locations with no more than two restaurants nearby:', good_res_count.sum())

good_ita_distance = np.array(df_roi_locations['Distance to Italian restaurant']>=400)
print('Locations with no Italian restaurants within 400m:', good_ita_distance.sum())

good_locations = np.logical_and(good_res_count, good_ita_distance)
print('Locations with both conditions met:', good_locations.sum())

df_good_locations = df_roi_locations[good_locations]


Locations with no more than two restaurants nearby: 426
Locations with no Italian restaurants within 400m: 307
Locations with both conditions met: 201


In [143]:
good_latitudes = df_good_locations['Latitude'].values
good_longitudes = df_good_locations['Longitude'].values

good_locations = [[lat, lon] for lat, lon in zip(good_latitudes, good_longitudes)]

map_berlin = folium.Map(location=roi_center, zoom_start=14)
folium.TileLayer('cartodbpositron').add_to(map_show)
HeatMap(restaurant_latlons).add_to(map_show)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.6).add_to(map_show)
folium.Marker(center).add_to(map_show)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_show) 
map_show

In [145]:
map_show = folium.Map(location=roi_center, zoom_start=14)
HeatMap(good_locations, radius=25).add_to(map_show)
folium.Marker(center).add_to(map_show)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_show)
map_show

In [146]:
from sklearn.cluster import KMeans

number_of_clusters = 15

good_xys = df_good_locations[['X', 'Y']].values
kmeans = KMeans(n_clusters=number_of_clusters, random_state=0).fit(good_xys)

cluster_centers = [xy_to_lonlat(cc[0], cc[1]) for cc in kmeans.cluster_centers_]

map_show = folium.Map(location=roi_center, zoom_start=14)
folium.TileLayer('cartodbpositron').add_to(map_show)
HeatMap(restaurant_latlons).add_to(map_show)
folium.Circle(roi_center, radius=2500, color='white', fill=True, fill_opacity=0.4).add_to(map_show)
folium.Marker(center).add_to(map_show)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=True, fill_opacity=0.25).add_to(map_show) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_show)
map_show

In [147]:
map_show = folium.Map(location=roi_center, zoom_start=14)
folium.Marker(center).add_to(map_show)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#00000000', fill=True, fill_color='#0066ff', fill_opacity=0.07).add_to(map_show)
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_show)
for lon, lat in cluster_centers:
    folium.Circle([lat, lon], radius=500, color='green', fill=False).add_to(map_show) 
map_show

In [148]:
cluster_centers

[(-0.09048884077553003, 51.49563406668553),
 (-0.10125726163423696, 51.48060300952082),
 (-0.09088015787897573, 51.48924771965879),
 (-0.10343878378411592, 51.495563565889505),
 (-0.08190229096974987, 51.48698425345293),
 (-0.08548132017991426, 51.492279788471194),
 (-0.1033360000054715, 51.48369624514827),
 (-0.09982540995639497, 51.51011026652659),
 (-0.09967468614621637, 51.48712027785723),
 (-0.08608767116360576, 51.488314505322144),
 (-0.0913373785694883, 51.491965310928904),
 (-0.09369688307076922, 51.48086178060532),
 (-0.086186042301157, 51.484525967600874),
 (-0.07623757328250512, 51.49083534848955),
 (-0.09494506442716916, 51.4949042036112)]

In [157]:
candidate_area_addresses = []
print('==============================================================')
print('Addresses of centers of areas recommended for further analysis')
print('==============================================================\n')
for lon, lat in cluster_centers:
    addr = get_address(key, lat, lon)
    candidate_area_addresses.append(addr)    
    x, y = lonlat_to_xy(lon, lat)
    d = calc_xy_distance(x, y, center_x, center_y)
    print('{}{} => {:.1f}km from Lambeth'.format(addr, ' '*(50-len(addr)), d/1000))
    

Addresses of centers of areas recommended for further analysis

Deverell Street, London, SE1 4AA                   => 2.6km from Lambeth
126 Bethwin Road, London, SE5 0YY                  => 2.8km from Lambeth
Townley Street, London, SE17 1DZ                   => 2.8km from Lambeth
37 Saint George's Road, London, SE1 6ER            => 1.7km from Lambeth
London, SE17 2                                     => 3.5km from Lambeth
23 Townsend Street, London, SE17 1HY               => 3.0km from Lambeth
27 Forsyth Gardens, London, SE17 3NE               => 2.5km from Lambeth
Westminster to Greenwich Thames River Services, London => 2.2km from Lambeth
(7 - 5) Pasley Close, London, SE17 3JY             => 2.4km from Lambeth
23 Thurlow Street, London, SE17 2FY                => 3.1km from Lambeth
93 Balfour Street, London, SE17 1PB                => 2.6km from Lambeth
4 Addington Square, London, SE5 7JZ                => 3.2km from Lambeth
2 Villa Street, London, SE17 2DF                   => 3.

In [168]:
map_show = folium.Map(location=roi_center, zoom_start=14)
folium.Circle(center, radius=50, color='red', fill=True, fill_color='red', fill_opacity=1).add_to(map_show)
for lonlat, addr in zip(cluster_centers, candidate_area_addresses):
    folium.Marker([lonlat[0], lonlat[1]], popup=addr).add_to(map_show) 
for lat, lon in zip(good_latitudes, good_longitudes):
    folium.Circle([lat, lon], radius=250, color='#0000ff00', fill=True, fill_color='#0066ff', fill_opacity=0.05).add_to(map_show)
map_show