Azure map key hidden

In [7]:
import requests, json
import pandas as pd
pd.set_option('display.max_colwidth', -1)

query = r'Bath, UK'

def get_coordinates(query, key):
    response = requests.get('https://atlas.microsoft.com/search/fuzzy/json?api-version=1.0&subscription-key=' + key + '&query=' + query)
    jsondata = response.json()['results']
    df = pd.DataFrame(jsondata)
    df = df[df['type']=='Geography']['position']
    lat = df[0]['lat']
    lon = df[0]['lon']
    return [lat, lon]

center = get_coordinates(query, key)

In [9]:
#!pip install shapely
import shapely.geometry

#!pip install pyproj
import pyproj

import math

def lonlat_to_xy(lon, lat):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    xy = pyproj.transform(proj_latlon, proj_xy, lon, lat)
    return xy[0], xy[1]

def xy_to_lonlat(x, y):
    proj_latlon = pyproj.Proj(proj='latlong',datum='WGS84')
    proj_xy = pyproj.Proj(proj="utm", zone=33, datum='WGS84')
    lonlat = pyproj.transform(proj_xy, proj_latlon, x, y)
    return lonlat[0], lonlat[1]

def calc_xy_distance(x1, y1, x2, y2):
    dx = x2 - x1
    dy = y2 - y1
    return math.sqrt(dx*dx + dy*dy)

print('Coordinate transformation check')
print('-------------------------------')
print('Bath center longitude={}, latitude={}'.format(center[1], center[0]))
x, y = lonlat_to_xy(center[1], center[0])
print('Bath center UTM X={}, Y={}'.format(x, y))
lo, la = xy_to_lonlat(x, y)
print('Bath center longitude={}, latitude={}'.format(lo, la))

Coordinate transformation check
-------------------------------
Bath center longitude=-2.36197, latitude=51.38488
Bath center UTM X=-703835.0271347496, Y=5837114.053623048
Bath center longitude=-2.36197, latitude=51.38488


In [42]:
center_x, center_y = lonlat_to_xy(center[1], center[0]) # City center in Cartesian coordinates

k = math.sqrt(3) / 2 # Vertical offset for hexagonal grid cells
x_min = center_x - 6000
x_step = 600
y_min = center_y - 6000 - (int(21/k)*k*600 - 12000)/2
y_step = 600 * k 

latitudes = []
longitudes = []
distances_from_center = []
xs = []
ys = []
for i in range(0, int(21/k)):
    y = y_min + i * y_step
    x_offset = 300 if i%2==0 else 0
    for j in range(0, 21):
        x = x_min + j * x_step + x_offset
        distance_from_center = calc_xy_distance(center_x, center_y, x, y)
        if (distance_from_center <= 6001):
            lon, lat = xy_to_lonlat(x, y)
            latitudes.append(lat)
            longitudes.append(lon)
            distances_from_center.append(distance_from_center)
            xs.append(x)
            ys.append(y)

print(len(latitudes), 'candidate neighborhood centers generated.')

364 candidate neighborhood centers generated.


In [61]:
#!pip install folium

import folium

map_show = folium.Map(location=center, zoom_start=13)
folium.Marker(center, popup='Roman Bath').add_to(map_show)
for lat, lon in zip(latitudes, longitudes):
    #folium.CircleMarker([lat, lon], radius=2, color='blue', fill=True, fill_color='blue', fill_opacity=1).add_to(map_berlin) 
    folium.Circle([lat, lon], radius=300, color='blue', fill=False).add_to(map_show)
    #folium.Marker([lat, lon]).add_to(map_show)
map_show

In [62]:
def get_address(key, latitude, longitude):
    try:
        response = requests.get('https://atlas.microsoft.com/search/address/reverse/json?api-version=1.0&subscription-key=' + key + 
                                '&query=' + str(latitude) + ',' + str(longitude))
        results = response.json()['addresses'][0]
        address = results['address']['freeformAddress']
        return address
    except:
        return None

addr = get_address(key, center[0], center[1])
print('Reverse geocoding check')
print('-----------------------')
print('Address of [{}, {}] is: {}'.format(center[0], center[1], addr))

Reverse geocoding check
-----------------------
Address of [51.38488, -2.36197] is: 23 Milsom Street, Bath, BA1 1DA


In [63]:
print('Obtaining location addresses: ', end='')
addresses = []
for lat, lon in zip(latitudes, longitudes):
    address = get_address(key, lat, lon)
    if address is None:
        address = 'NO ADDRESS'
    #address = address.replace(', Germany', '') # We don't need country part of address
    addresses.append(address)
    print(' .', end='')
print(' done.')

Obtaining location addresses:  . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . done.


In [64]:
addresses[150:170]

['Sally in the Wood, Bath, BA1 8',
 'Bradford-on-Avon, BA15 2',
 '74 Monkton Farleigh, Bradford-on-Avon, BA15 2',
 'Bath, BA2 9JA',
 'Bath, BA2 9',
 'Bath, BA2 9JE',
 'Bath, BA2 9',
 '54 Rosewarn Close, Bath, BA2 1PB',
 '7 Cotswold View, Bath, BA2 1HA',
 '48 Dartmouth Avenue, Bath, BA2 1AS',
 '5 Arlington Road, Bath, BA2 3PG',
 '138 Lower Bristol Road, Bath, BA2 3BE',
 '4 Beau Street, Bath, BA1 1QY',
 'Bath, BA2 4',
 'Chatham Park, Bath, BA2 6JR',
 'Golf Course Road, Bath, BA2 6',
 'Bath, BA2 7',
 'Warminster Road, Bath, BA2 6XL',
 'Warleigh Lane, Bath, BA1 8ED',
 'Bradford Road, Bath, BA1 8EA']

In [65]:
df_locations = pd.DataFrame({'Address': addresses,
                             'Latitude': latitudes,
                             'Longitude': longitudes,
                             'X': xs,
                             'Y': ys,
                             'Distance from center': distances_from_center})

df_locations.head(10)

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center
0,"Bath, BA2 8",51.332056,-2.367505,-705635.027135,5831398.0,5992.495307
1,"Bath, BA2 8",51.333313,-2.359286,-705035.027135,5831398.0,5840.3767
2,"Bath, BA2 8QY",51.334569,-2.351065,-704435.027135,5831398.0,5747.173218
3,"Midford Hill, Bath, BA2 7",51.335826,-2.342845,-703835.027135,5831398.0,5715.767665
4,"Bath, BA2 7",51.337081,-2.334623,-703235.027135,5831398.0,5747.173218
5,"Pipehouse, Bath, BA2 7PN",51.338336,-2.326401,-702635.027135,5831398.0,5840.3767
6,"Bath, BA2 7UN",51.33959,-2.318179,-702035.027135,5831398.0,5992.495307
7,"Bath, BA2 7",51.334628,-2.381573,-706535.027135,5831918.0,5855.766389
8,"Tucking Mill Lane, Bath, BA2 7EE",51.335886,-2.373354,-705935.027135,5831918.0,5604.462508
9,"Bath, BA2 8",51.337144,-2.365134,-705335.027135,5831918.0,5408.326913


In [66]:
df_locations.to_pickle('./locations.pkl')   

Foursquare client key and secret hidden

In [68]:
food_category = '4d4b7105d754a06374d81259' # 'Root' category for all food-related venues

italian_restaurant_categories = ['4bf58dd8d48988d110941735','55a5a1ebe4b013909087cbb6','55a5a1ebe4b013909087cb7c',
                                 '55a5a1ebe4b013909087cba7','55a5a1ebe4b013909087cba1','55a5a1ebe4b013909087cba4',
                                 '55a5a1ebe4b013909087cb95','55a5a1ebe4b013909087cb89','55a5a1ebe4b013909087cb9b',
                                 '55a5a1ebe4b013909087cb98','55a5a1ebe4b013909087cbbf','55a5a1ebe4b013909087cb79',
                                 '55a5a1ebe4b013909087cbb0','55a5a1ebe4b013909087cbb3','55a5a1ebe4b013909087cb74',
                                 '55a5a1ebe4b013909087cbaa','55a5a1ebe4b013909087cb83','55a5a1ebe4b013909087cb8c',
                                 '55a5a1ebe4b013909087cb92','55a5a1ebe4b013909087cb8f','55a5a1ebe4b013909087cb86',
                                 '55a5a1ebe4b013909087cbb9','55a5a1ebe4b013909087cb7f','55a5a1ebe4b013909087cbbc',
                                 '55a5a1ebe4b013909087cb9e','55a5a1ebe4b013909087cbc2','55a5a1ebe4b013909087cbad']

def is_restaurant(categories, specific_filter=None):
    restaurant_words = ['restaurant', 'diner', 'taverna', 'steakhouse']
    restaurant = False
    specific = False
    for c in categories:
        category_name = c[0].lower()
        category_id = c[1]
        for r in restaurant_words:
            if r in category_name:
                restaurant = True
        if 'fast food' in category_name:
            restaurant = False
        if not(specific_filter is None) and (category_id in specific_filter):
            specific = True
            restaurant = True
    return restaurant, specific

def get_categories(categories):
    return [(cat['name'], cat['id']) for cat in categories]

def format_address(location):
    address = ', '.join(location['formattedAddress'])
    #address = address.replace(', Deutschland', '')
    #address = address.replace(', Germany', '')
    return address

def get_venues_near_location(lat, lon, category, client_id, client_secret, radius=500, limit=100):
    version = '20180724'
    url = 'https://api.foursquare.com/v2/venues/explore?client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&radius={}&limit={}'.format(
        client_id, client_secret, version, lat, lon, category, radius, limit)
    try:
        results = requests.get(url).json()['response']['groups'][0]['items']
        venues = [(item['venue']['id'],
                   item['venue']['name'],
                   get_categories(item['venue']['categories']),
                   (item['venue']['location']['lat'], item['venue']['location']['lng']),
                   format_address(item['venue']['location']),
                   item['venue']['location']['distance']) for item in results]        
    except:
        venues = []
    return venues

In [69]:
# Let's now go over our neighborhood locations and get nearby restaurants; we'll also maintain a dictionary of all found restaurants and all found italian restaurants

import pickle

def get_restaurants(lats, lons):
    restaurants = {}
    italian_restaurants = {}
    location_restaurants = []

    print('Obtaining venues around candidate locations:', end='')
    for lat, lon in zip(lats, lons):
        # Using radius=350 to meke sure we have overlaps/full coverage so we don't miss any restaurant (we're using dictionaries to remove any duplicates resulting from area overlaps)
        venues = get_venues_near_location(lat, lon, food_category, client_id, client_secret, radius=350, limit=100)
        area_restaurants = []
        for venue in venues:
            venue_id = venue[0]
            venue_name = venue[1]
            venue_categories = venue[2]
            venue_latlon = venue[3]
            venue_address = venue[4]
            venue_distance = venue[5]
            is_res, is_italian = is_restaurant(venue_categories, specific_filter=italian_restaurant_categories)
            if is_res:
                x, y = lonlat_to_xy(venue_latlon[1], venue_latlon[0])
                restaurant = (venue_id, venue_name, venue_latlon[0], venue_latlon[1], venue_address, venue_distance, is_italian, x, y)
                if venue_distance<=300:
                    area_restaurants.append(restaurant)
                restaurants[venue_id] = restaurant
                if is_italian:
                    italian_restaurants[venue_id] = restaurant
        location_restaurants.append(area_restaurants)
        print(' .', end='')
    print(' done.')
    return restaurants, italian_restaurants, location_restaurants

# Try to load from local file system in case we did this before
restaurants = {}
italian_restaurants = {}
location_restaurants = []
loaded = False
try:
    with open('restaurants_350.pkl', 'rb') as f:
        restaurants = pickle.load(f)
    with open('italian_restaurants_350.pkl', 'rb') as f:
        italian_restaurants = pickle.load(f)
    with open('location_restaurants_350.pkl', 'rb') as f:
        location_restaurants = pickle.load(f)
    print('Restaurant data loaded.')
    loaded = True
except:
    pass

# If load failed use the Foursquare API to get the data
if not loaded:
    restaurants, italian_restaurants, location_restaurants = get_restaurants(latitudes, longitudes)
    
    # Let's persists this in local file system
    with open('restaurants_350.pkl', 'wb') as f:
        pickle.dump(restaurants, f)
    with open('italian_restaurants_350.pkl', 'wb') as f:
        pickle.dump(italian_restaurants, f)
    with open('location_restaurants_350.pkl', 'wb') as f:
        pickle.dump(location_restaurants, f)
        

Restaurant data loaded.


In [70]:
import numpy as np

print('Total number of restaurants:', len(restaurants))
print('Total number of Italian restaurants:', len(italian_restaurants))
print('Percentage of Italian restaurants: {:.2f}%'.format(len(italian_restaurants) / len(restaurants) * 100))
print('Average number of restaurants in neighborhood:', np.array([len(r) for r in location_restaurants]).mean())

Total number of restaurants: 87
Total number of Italian restaurants: 12
Percentage of Italian restaurants: 13.79%
Average number of restaurants in neighborhood: 0.21428571428571427


In [71]:
print('List of all restaurants')
print('-----------------------')
for r in list(restaurants.values())[:5]:
    print(r)
print('...')
print('Total:', len(restaurants))

List of all restaurants
-----------------------
('4c8cca99cf3ea143a633f851', 'Wheatsheaf', 51.3390657886206, -2.3801008236643, 'United Kingdom', 95, False, -706314.3142863151, 5832381.160454403)
('53f0fa86498e4bb2e61fadab', 'Pizzeria', 51.35301971435547, -2.376837730407715, 'United Kingdom', 253, False, -705714.5310931047, 5833860.678559744)
('50c3b319e4b0e8ae08fcd270', 'The Georgian Room', 51.36067478023836, -2.3283155235319386, 'Brassknocker Hill (Monkton Combe), Bath, Bath and North East Somerset, BA2 7HU, United Kingdom', 195, False, -702168.2245962364, 5833887.057190771)
('50a2af8fe4b0473b2bb6cee6', 'Curry Garden', 51.3570670754563, -2.381474057757438, '41 Upper Bloomfield Road, Bath, Bath and North East Somerset, Ba2 2RY, United Kingdom', 312, False, -705924.9464073204, 5834383.781548536)
('519d0b37498e574bafc08fc4', 'Fortune Cookie Chinese Takeaway', 51.360043, -2.372844, 'Bath, Bath and North East Somerset, United Kingdom', 217, False, -705250.801006096, 5834565.782543887)
...


In [72]:
print('List of Italian restaurants')
print('---------------------------')
for r in list(italian_restaurants.values())[:5]:
    print(r)
print('...')
print('Total:', len(italian_restaurants))

List of Italian restaurants
---------------------------
('4bd42e79a8b3a593185b6b5f', 'Grounded', 51.37630725448867, -2.3735397075463966, 'Bristol,  Bs3, United Kingdom', 311, True, -704861.5400296557, 5836365.947003131)
('51c34a1d498eb7a74e441661', 'Prezzo', 51.377890284203566, -2.357964750830706, 'The Vaults, 6-7 Brunel Sq, Bath, Bath and North East Somerset, BA1 1SX, United Kingdom', 288, True, -703747.170458632, 5836278.1222277135)
('4f259508e4b0e12082cf23cb', 'Sotto Sotto', 51.380801837924736, -2.3565904553672548, '10 North Parade, Bath, Bath and North East Somerset, BA2 4AL, United Kingdom', 333, True, -703574.4055299596, 5836575.189499536)
('4e0620927d8b2a0180523f52', 'Joya', 51.38254613023818, -2.3578279863661566, '6 Newmarket Row, Bath, Bath and North East Somerset, BA2 4AN, United Kingdom', 329, True, -703612.7158578844, 5836787.795197384)
('4bae882bf964a52084bd3be3', 'Mezzaluna', 51.38295789895659, -2.346750388967374, 'United Kingdom', 206, True, -702839.4094480155, 5836646.9

In [73]:
print('Restaurants around location')
print('---------------------------')
for i in range(100, 110):
    rs = location_restaurants[i][:8]
    names = ', '.join([r[1] for r in rs])
    print('Restaurants around location {}: {}'.format(i+1, names))

Restaurants around location
---------------------------
Restaurants around location 101: 
Restaurants around location 102: Lido Chef
Restaurants around location 103: 
Restaurants around location 104: 
Restaurants around location 105: 
Restaurants around location 106: 
Restaurants around location 107: 
Restaurants around location 108: 
Restaurants around location 109: 
Restaurants around location 110: 


In [74]:
map_show = folium.Map(location=center, zoom_start=13)
folium.Marker(center, popup='Roman Bath').add_to(map_show)
for res in restaurants.values():
    lat = res[2]; lon = res[3]
    is_italian = res[6]
    color = 'red' if is_italian else 'blue'
    folium.CircleMarker([lat, lon], radius=3, color=color, fill=True, fill_color=color, fill_opacity=1).add_to(map_show)
map_show

In [76]:
location_restaurants_count = [len(res) for res in location_restaurants]

df_locations['Restaurants in area'] = location_restaurants_count

print('Average number of restaurants in every area with radius=300m:', np.array(location_restaurants_count).mean())

df_locations.head(20)

Average number of restaurants in every area with radius=300m: 0.21428571428571427


Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area
0,"Bath, BA2 8",51.332056,-2.367505,-705635.027135,5831398.0,5992.495307,0
1,"Bath, BA2 8",51.333313,-2.359286,-705035.027135,5831398.0,5840.3767,0
2,"Bath, BA2 8QY",51.334569,-2.351065,-704435.027135,5831398.0,5747.173218,0
3,"Midford Hill, Bath, BA2 7",51.335826,-2.342845,-703835.027135,5831398.0,5715.767665,0
4,"Bath, BA2 7",51.337081,-2.334623,-703235.027135,5831398.0,5747.173218,0
5,"Pipehouse, Bath, BA2 7PN",51.338336,-2.326401,-702635.027135,5831398.0,5840.3767,0
6,"Bath, BA2 7UN",51.33959,-2.318179,-702035.027135,5831398.0,5992.495307,0
7,"Bath, BA2 7",51.334628,-2.381573,-706535.027135,5831918.0,5855.766389,0
8,"Tucking Mill Lane, Bath, BA2 7EE",51.335886,-2.373354,-705935.027135,5831918.0,5604.462508,0
9,"Bath, BA2 8",51.337144,-2.365134,-705335.027135,5831918.0,5408.326913,0


In [77]:
distances_to_italian_restaurant = []

for area_x, area_y in zip(xs, ys):
    min_distance = 10000
    for res in italian_restaurants.values():
        res_x = res[7]
        res_y = res[8]
        d = calc_xy_distance(area_x, area_y, res_x, res_y)
        if d<min_distance:
            min_distance = d
    distances_to_italian_restaurant.append(min_distance)

df_locations['Distance to Italian restaurant'] = distances_to_italian_restaurant

In [79]:
df_locations.head(20)

Unnamed: 0,Address,Latitude,Longitude,X,Y,Distance from center,Restaurants in area,Distance to Italian restaurant
0,"Bath, BA2 8",51.332056,-2.367505,-705635.027135,5831398.0,5992.495307,0,5027.518131
1,"Bath, BA2 8",51.333313,-2.359286,-705035.027135,5831398.0,5840.3767,0,4970.689493
2,"Bath, BA2 8QY",51.334569,-2.351065,-704435.027135,5831398.0,5747.173218,0,4928.077599
3,"Midford Hill, Bath, BA2 7",51.335826,-2.342845,-703835.027135,5831398.0,5715.767665,0,4880.627092
4,"Bath, BA2 7",51.337081,-2.334623,-703235.027135,5831398.0,5747.173218,0,4906.637627
5,"Pipehouse, Bath, BA2 7PN",51.338336,-2.326401,-702635.027135,5831398.0,5840.3767,0,5004.964015
6,"Bath, BA2 7UN",51.33959,-2.318179,-702035.027135,5831398.0,5992.495307,0,5171.483035
7,"Bath, BA2 7",51.334628,-2.381573,-706535.027135,5831918.0,5855.766389,0,4752.43838
8,"Tucking Mill Lane, Bath, BA2 7EE",51.335886,-2.373354,-705935.027135,5831918.0,5604.462508,0,4575.749778
9,"Bath, BA2 8",51.337144,-2.365134,-705335.027135,5831918.0,5408.326913,0,4473.175774


In [80]:
print('Average distance to closest Italian restaurant from each area center:', df_locations['Distance to Italian restaurant'].mean())

Average distance to closest Italian restaurant from each area center: 2929.7218001432566
