In [97]:
import pandas as pd
import requests
import json
from pyproj import Transformer
from math import radians, cos, sin, asin, sqrt
import os.path
import time

datapath='../data/'
secret_file = '../secrets/gcloud.json'

origin="Haifa Hof Hacarmel"
origin="כחל"
origin_english = 'Kahal'
MODE='driving'
#MODE='transit'
MAX_ELEM = 25

filename = f'{datapath}dist_{MODE}_{origin}.csv'
assert not os.path.isfile(filename), "Data already exists, will not re-fetch it to reduce costs"

with open(secret_file) as json_file:
    secrets = json.load(json_file)
    api_key=secrets['api_key']

In [118]:
transformer = Transformer.from_crs('epsg:2039', 'epsg:4326')

def get_distances(origin, destinations, api_key):
    if isinstance(origin, list):
        origin = ('|').join(origin)
        
    params = {
        'origins' : origin,
        'destinations' : ('|').join(destinations),
        'units' : 'metric',
        'mode' : MODE,
        'key' :  api_key
    }

    response = requests.get(
                    url = 'https://maps.googleapis.com/maps/api/distancematrix/json?',
                    params = params,
                    )
    return response

def coords_to_lonlat(c):
    s = f'{c:.0f}'

    if s == 'nan':
        return None, None, None
    assert len(s) == 10, "Input string must be 10 characters long"
    X = int(s[:5] + '0')
    Y = int(s[5:] + '0')

    lat, lon = transformer.transform(X, Y)
    return lon, lat, f'{lat:.4f},{lon:.4f}'

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a))
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

In [104]:
# source: https://www.cbs.gov.il/he/publications/Pages/2019/יישובים-בישראל.aspx
places = pd.read_excel(datapath + 'places-cbs.xlsx')

places.rename(columns={'שם יישוב באנגלית' : 'name', 'סך הכל אוכלוסייה 2021' : 'pop', 'קואורדינטות' : 'coords'}, inplace=True)

places[['lon', 'lat', 'lonlat']] = places['coords'].apply(lambda x: pd.Series(coords_to_lonlat(x)))
origin_query = places.query('name == @origin_english')
if len(origin_query) == 0:
    raise ValueError(f'No place name in the file found for {origin_english}')
origin_coords = origin_query.iloc[0]['coords']

dests = places.query('pop > 100 and pop < 1000').copy().reset_index().drop(columns='index')
print(f'{len(dests)} destinations')
orig_lon, orig_lat, orig_lonlat = coords_to_lonlat(origin_coords)

698 destinations


In [106]:
dests['distance'] = None
dests['duration'] = None
dests['g_orig'] = None
dests['g_dest'] = None
dests['crow_dist'] = None
dests['mode'] = MODE

s_idx = 0
dlist = dests['lonlat'].to_list()
while s_idx < len(dlist):
    print(s_idx, end='...')
    e_idx = min(s_idx+MAX_ELEM, len(dests))
    r = range(s_idx, e_idx)
    dests_call = dlist[s_idx:e_idx]
    response = get_distances(origin, dests_call, api_key)
    time.sleep(0.5)
    matrix = response.json()
    DESTINATIONS=matrix['destination_addresses']
    ORIGIN=matrix['origin_addresses'][0]
    matrix = matrix['rows'][0]['elements']

    o_origin, o_dest, o_distance, o_duration, o_crow = [], [], [], [], []

    num_failed = 0
    for i, element in enumerate(matrix):
        o_origin.append(ORIGIN)
        o_dest.append(DESTINATIONS[i])
        o_crow.append(1000.*haversine(orig_lon, orig_lat, dests.loc[s_idx+i, 'lon'], dests.loc[s_idx+i, 'lat']))
        if element['status'] == 'OK':
            o_distance.append(element['distance']['value'])
            o_duration.append(element['duration']['value'])
        else:
            num_failed += 1
            o_distance.append(None)
            o_duration.append(None)
    if num_failed/len(dests_call) > 0.1:
        raise ValueError(f'Too many failed calls: {num_failed}/{len(dests_call)}, check the origin place name')
    
    dests.loc[r, 'distance'] = o_distance
    dests.loc[r, 'duration'] = o_duration
    dests.loc[r, 'g_orig'] = o_origin
    dests.loc[r, 'g_dest'] = o_dest
    dests.loc[r, 'crow_dist'] = o_crow
    
    s_idx += MAX_ELEM


0...25...50...75...100...125...150...175...200...225...250...275...300...325...350...375...400...425...450...475...500...525...550...575...600...625...650...675...

In [133]:
def batch_get_distances(places, origin_idx, destination_idxs, api_key, mode='driving', max_elem=25):
    results = []#= pd.DataFrame(columns=['distance', 'duration', 'g_orig', 'g_dest', 'crow_dist', 'mode', 'origin_idx', 'dest_idx'])
    
    origin = places.loc[origin_idx, 'lonlat']
    origin_lat, origin_lon, origin_name = places.loc[origin_idx, ['lat', 'lon', 'name']]

    s_idx = 0
    while s_idx < len(destination_idxs):
        print(s_idx, end='...')
        e_idx = min(s_idx + max_elem, len(destination_idxs))
        
        batch_dest_idxs = destination_idxs[s_idx:e_idx]
        destinations = places.loc[batch_dest_idxs, 'lonlat'].tolist()
        
        response = get_distances(origin, destinations, api_key)
        time.sleep(0.25)
        matrix = response.json()
        
        DESTINATIONS = matrix['destination_addresses']
        ORIGIN = matrix['origin_addresses'][0]
        elements = matrix['rows'][0]['elements']

        for j, element in enumerate(elements):
            dest_idx = batch_dest_idxs[j]
            dest_lat, dest_lon, dest_name = places.loc[dest_idx, ['lat', 'lon', 'name']]
            
            result_row = {
                'g_orig': ORIGIN,
                'g_dest': DESTINATIONS[j],
                'crow_dist': 1000 * haversine(origin_lon, origin_lat, dest_lon, dest_lat),
                'mode': mode,
                'origin_idx': origin_idx,
                'dest_idx': dest_idx,
                'orig_name': origin_name,
                'dest_name': dest_name,
            }
            
            if element['status'] == 'OK':
                result_row['distance'] = element['distance']['value']
                result_row['duration'] = element['duration']['value']
            else:
                result_row['distance'] = None
                result_row['duration'] = None
            
            results.append(result_row)
    
        s_idx += max_elem
    
    ret = pd.DataFrame(results)
    ret['speed'] = ret['crow_dist']/ret['duration']*3.6
    ret['ratio'] = ret['crow_dist']/ret['distance']

    return ret

origin_query = places.query('name == @origin_english')
if len(origin_query) == 0:
    raise ValueError(f'No place name in the file found for {origin_name}')
origin_idx = origin_query.index[0]
destination_idxs = places.query('pop > 900 and pop < 1000').index.to_list()
results = batch_get_distances(places, origin_idx, destination_idxs, api_key, MODE, MAX_ELEM)

0...25...50...75...

In [134]:
results


Unnamed: 0,g_orig,g_dest,crow_dist,mode,origin_idx,dest_idx,orig_name,dest_name,distance,duration,speed,ratio
0,"65, Kahal, Israel","108, Aderet, Israel",145135.585066,driving,664,32,Kahal,Adderet,203772,7785,67.114721,0.712245
1,"65, Kahal, Israel","93, Ahihud, Israel",31567.582518,driving,664,143,Kahal,Ahihud,40279,2255,50.396141,0.783723
2,"65, Kahal, Israel","Elyakim Center, Eliakim, Israel",50450.378037,driving,664,179,Kahal,Elyaqim,69129,2819,64.427585,0.729800
3,"65, Kahal, Israel","Arbel St 49, Eshhar, Israel",19606.937147,driving,664,213,Kahal,Eshhar,35230,2405,29.349261,0.556541
4,"65, Kahal, Israel","Ha-Rishonim St 30, Beit Herut, Israel",82795.502280,driving,664,262,Kahal,Bet Herut,110903,4446,67.040893,0.746558
...,...,...,...,...,...,...,...,...,...,...,...,...
73,"65, Kahal, Israel","ההר 72, Shamir, Israel",33698.695512,driving,664,1411,Kahal,Shamir,41938,2373,51.123179,0.803536
74,"65, Kahal, Israel","Haikarim St 71, Shafir, Israel",151721.063730,driving,664,1426,Kahal,Shafir,187649,7123,76.680588,0.808536
75,"65, Kahal, Israel","14, Talmei Elazar, Israel",70255.560293,driving,664,1457,Kahal,Talme El'azar,95868,3873,65.303387,0.732836
76,"65, Kahal, Israel","18, Tlamim, Israel",167252.769996,driving,664,1462,Kahal,Telamim,218524,8098,74.352923,0.765375


In [135]:
results.sort_values(by='ratio')[['distance', 'duration', 'speed', 'crow_dist', 'orig_name', 'dest_name']]

Unnamed: 0,distance,duration,speed,crow_dist,orig_name,dest_name
51,46628,2419,34.267857,23026.095962,Kahal,Natur
7,40484,1874,40.146212,20898.333877,Kahal,Bet Rimmon
69,170798,6844,47.774616,90824.853309,Kahal,Rehelim
33,42775,2583,32.149560,23067.309004,Kahal,Yodefat
52,97307,4055,47.105722,53059.362010,Kahal,Nir Ezyon
...,...,...,...,...,...,...
74,187649,7123,76.680588,151721.063730,Kahal,Shafir
49,194063,7415,76.366931,157294.665741,Kahal,Negba
14,202714,8002,73.924549,164317.844415,Kahal,Ge'a
44,239638,8895,78.750222,194578.674492,Kahal,Maslul


In [55]:
dests.query('crow_dist < 15000').sort_values(by='speed')[['distance', 'duration', 'speed', 'crow_dist', 'name']][0:20]

Unnamed: 0,distance,duration,speed,crow_dist,name
306,86299,3846,5.835378,6234.128851,Kefar Gallim
197,87939,3980,6.763965,7477.939096,HaHoterim
73,84680,3896,7.465841,8079.699177,Bet Oren
301,65823,3098,8.748249,7528.35426,Kefar Bialik
94,97097,4117,8.755019,10012.336714,Bet Zevi
523,96579,4154,10.209472,11780.595772,En Hod
530,100218,4372,10.278262,12482.37784,Ein Hod
485,97505,4083,10.305967,11688.684028,Nir Ezyon
322,65899,2836,12.028844,9476.056328,Kefar Hasidim Bet
321,65792,2889,12.234254,9817.98867,Kefar Hasidim Alef


In [12]:
dests.to_csv(filename, index=False)

In [108]:
def nearby_places(places, max_dist):
    # find all pairs of places which are less than max_dist apart in aerial distance
    pairs = []
    for i, p1 in places.iterrows():
        for j, p2 in places.iterrows():
            if i < j and haversine(p1['lon'], p1['lat'], p2['lon'], p2['lat']) < max_dist:
                pairs.append((p1['name'], p2['name']))
    return pairs

In [115]:
nearby_pairs = nearby_places(places.query("pop < 1000"), 3)

In [116]:
nearby_pairs

[('Avtalyon', 'Hararit'),
 ("Avi'el", 'Allone Yizhaq'),
 ('Avivim', "Yir'on"),
 ('Avigedor', 'En Zurim'),
 ('Avigedor', 'Shafir'),
 ('Avigedor', 'Timmorim'),
 ('Avital', "Yizre'el"),
 ('Avital', 'Metav'),
 ('Avital', 'Perazon'),
 ("Avi'ezer", "Newe Mikha'el"),
 ('Even Menahem', 'Kefar Rozenwald(Zarit)'),
 ('Even Menahem', "Netu'a"),
 ('Even Menahem', 'Shomera'),
 ('Even Menahem', 'Shetula'),
 ('Even Sappir', "En Karem-B.S.Haqla'i"),
 ('Even Sappir', 'Zova'),
 ('Avshalom', 'Deqel'),
 ('Avshalom', 'Yevul'),
 ('Avshalom', 'Yated'),
 ('Avshalom', 'Sede Avraham'),
 ('Adora', 'Telem'),
 ('Addirim', 'Baraq'),
 ('Addirim', 'Gadish'),
 ('Addirim', 'Devora'),
 ('Addirim', 'Hever'),
 ('Addirim', 'Metav'),
 ('Addirim', 'Ram-On'),
 ('Adamit', "Ya'ara"),
 ('Adderet', "Newe Mikha'el"),
 ('Odem', 'El-Rom'),
 ('Ohad', 'Yesha'),
 ('Ohad', 'Mivtahim'),
 ('Ohad', "Ammi'oz"),
 ('Ohad', 'Zohar'),
 ('Ohad', 'Sede Nizzan'),
 ('Ohad', 'Talme Eliyyahu'),
 ('Omen', 'Gadish'),
 ('Omen', 'Devora'),
 ('Omen', 'Heve

In [117]:
len(nearby_pairs)

777