# Pair distances, Ohio/Abroad

7 Aug 2021

In [1]:
%matplotlib inline

from itertools import combinations_with_replacement
import logging
import math
import sys
import time

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests

In [2]:
logger = logging.getLogger("pair_distances")
logger.setLevel(logging.DEBUG)
# protect against duplicating this handler on notebook rerun
if len(logger.handlers) < 1:
    logger.addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
# input of all desired ohio/abroad pairs (tab-separated)
df_in = pd.read_csv("../ohio_abroad.csv", sep="\t")
df_in.head()

Unnamed: 0,ohio,abroad
0,Antwerp,"Antwerp, Belgium"
1,Toronto,"Toronto, Ontario, Canada"
2,Truro Township,"Truro, Nova Scotia, Canada"
3,Canton,"Guangzhou, China"
4,Cairo,"Cairo, Egypt"


In [4]:
# input of all known locations
df_loc = pd.read_csv("../ohio_abroad_locs.csv")
df_loc.head()

Unnamed: 0,ohio,abroad,ohio_lon,ohio_lat,abroad_lon,abroad_lat
0,Antwerp,"Antwerp, Belgium",-84.740514,41.181439,4.399708,51.22111
1,Toronto,"Toronto, Ontario, Canada",-80.600906,40.464233,-79.383935,43.653482
2,Truro Township,"Truro, Nova Scotia, Canada",-82.806986,39.955209,-63.300006,45.366668
3,Canton,"Guangzhou, China",-81.374951,40.798546,113.259294,23.130196
4,Cairo,"Cairo, Egypt",-84.08198,40.833128,31.235726,30.044388


Functions to get locations

In [5]:
def get_lon_lat(place_name):
    osm_url = f"https://nominatim.openstreetmap.org/search?q={place_name}&format=json"
    logger.debug(f"GET request to {osm_url}")
    r = requests.get(osm_url)
    places = r.json()
    if len(places) > 0:
        top_place = r.json()[0]
    else:
        top_place = {}
    lat = top_place.get("lat")
    lon = top_place.get("lon")
    return lon, lat

def distance(origin, destination):
    lat1, lon1 = origin
    lat2, lon2 = destination
    radius = 6371 # km

    dlat = math.radians(lat2-lat1)
    dlon = math.radians(lon2-lon1)
    a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
        * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = radius * c

    return d

In [6]:
def get_ohio_lon_lat_series(ohio_place_names: pd.Series):
    """wrap get_lon_lat for Series of Ohio place names"""
    full_names = ohio_place_names + ", Ohio, United States"
    lons = []
    lats = []
    for full_name in full_names:
        lon, lat = get_lon_lat(full_name)
        lons.append(lon)
        lats.append(lat)
        time.sleep(0.2)
    return lons, lats

In [7]:
df_loc.columns

Index(['ohio', 'abroad', 'ohio_lon', 'ohio_lat', 'abroad_lon', 'abroad_lat'], dtype='object')

In [8]:
df_loc.dtypes

ohio           object
abroad         object
ohio_lon      float64
ohio_lat      float64
abroad_lon    float64
abroad_lat    float64
dtype: object

In [9]:
df_in.dtypes

ohio      object
abroad    object
dtype: object

Get existing locations and 

In [10]:
df = pd.merge(df_in, df_loc, how="left", on=["ohio", "abroad"])
df

Unnamed: 0,ohio,abroad,ohio_lon,ohio_lat,abroad_lon,abroad_lat
0,Antwerp,"Antwerp, Belgium",-84.740514,41.181439,4.399708,51.22111
1,Toronto,"Toronto, Ontario, Canada",-80.600906,40.464233,-79.383935,43.653482
2,Truro Township,"Truro, Nova Scotia, Canada",-82.806986,39.955209,-63.300006,45.366668
3,Canton,"Guangzhou, China",-81.374951,40.798546,113.259294,23.130196
4,Cairo,"Cairo, Egypt",-84.08198,40.833128,31.235726,30.044388
5,Belfort,"Belfort, France",-81.245385,40.812281,6.862894,47.63796
6,Pyrmont,"Bad Pyrmont, Germany",-84.460782,39.811998,9.243464,51.98505
7,Bonn,"Bonn, Germany",-81.385397,39.531463,7.10066,50.735851
8,Glandorf,"Glandorf, Germany",-84.079119,41.028996,8.003365,52.081578
9,Leipsic,"Leipzig, Germany",-83.984666,41.098385,12.374733,51.340632


In [11]:
need_update_abroad = df.loc[(df.abroad_lon.isnull()) | (df.abroad_lat.isnull()), "abroad"]
need_update_abroad

Series([], Name: abroad, dtype: object)

In [12]:
need_update_ohio = df.loc[(df.ohio_lon.isnull()) | (df.ohio_lat.isnull()), "ohio"]
need_update_ohio

Series([], Name: ohio, dtype: object)

In [13]:
new_lons, new_lats = get_ohio_lon_lat_series(need_update_ohio)

In [14]:
new_lons_abroad, new_lats_abroad = [], []
for loc in need_update_abroad:
    lon, lat = get_lon_lat(loc)
    new_lons_abroad.append(lon)
    new_lats_abroad.append(lat)

Update

In [15]:
df.loc[df.abroad_lon.isnull(), "abroad_lon"] = new_lons_abroad

In [16]:
df.loc[df.abroad_lat.isnull(), "abroad_lat"] = new_lats_abroad
df.loc[df.ohio_lon.isnull(), "ohio_lon"] = new_lons
df.loc[df.ohio_lat.isnull(), "ohio_lat"] = new_lats

In [17]:
df[df.ohio.duplicated(keep=False)]

Unnamed: 0,ohio,abroad,ohio_lon,ohio_lat,abroad_lon,abroad_lat


In [18]:
df.sort_values("ohio")

Unnamed: 0,ohio,abroad,ohio_lon,ohio_lat,abroad_lon,abroad_lat
22,Amsterdam,"Amsterdam, Netherlands",-80.922867,40.473676,4.893604,52.37276
34,Antrim,"Antrim, United Kingdom",-81.358447,40.120347,-6.219204,54.715139
0,Antwerp,"Antwerp, Belgium",-84.740514,41.181439,4.399708,51.22111
12,Athens,"Athens, Greece",-82.101248,39.328924,23.728305,37.983941
5,Belfort,"Belfort, France",-81.245385,40.812281,6.862894,47.63796
35,Berea,"Veria, Greece",-81.854303,41.366161,22.203683,40.521534
30,Bexley,"Bexley, United Kingdom",-82.936864,39.969238,0.150488,51.441679
7,Bonn,"Bonn, Germany",-81.385397,39.531463,7.10066,50.735851
25,Cadiz,"Cadiz, Spain",-80.996763,40.272845,-6.292898,36.529744
4,Cairo,"Cairo, Egypt",-84.08198,40.833128,31.235726,30.044388


Update cached locations

In [19]:
df.to_csv("../ohio_abroad_locs.csv", index=False)

Output: pairwise distances.

Also include coordinates to enable plotting locations.

In [20]:
!head ../pairs.csv

first_ohio_name,first_ohio_lon,first_ohio_lat,second_ohio_name,second_ohio_lon,second_ohio_lat,dist_ohio,first_abroad_name,first_abroad_lon,first_abroad_lat,second_abroad_name,second_abroad_lon,second_abroad_lat,dist_abroad,ratio
Amsterdam,-80.9228671,40.4736758,Amsterdam,-80.9228671,40.4736758,0.0,"Amsterdam, Netherlands",4.8936041,52.3727598,"Amsterdam, Netherlands",4.8936041,52.3727598,0.0,
Amsterdam,-80.9228671,40.4736758,Antrim,-81.3584466,40.1203474,53.92759307798537,"Amsterdam, Netherlands",4.8936041,52.3727598,"Antrim, United Kingdom",-6.2192038,54.715139,778.0656491738075,0.06930982383716416
Amsterdam,-80.9228671,40.4736758,Antwerp,-84.7405144,41.1814388,330.68263863561356,"Amsterdam, Netherlands",4.8936041,52.3727598,"Antwerp, Belgium",4.3997081,51.2211097,132.48440674373552,2.4960117704663367
Amsterdam,-80.9228671,40.4736758,Athens,-82.1012479,39.3289242,162.19098331141774,"Amsterdam, Netherlands",4.8936041,52.3727598,"Athens, Greece",23.7283052,37.9839412,2164.028648659992,

In [21]:
pairs = []
for i, j in combinations_with_replacement(df.sort_values("ohio").index, 2):
    first_row = df.loc[i]
    second_row = df.loc[j]
    pairs.append({
        "first_ohio_name": first_row.ohio,
        "first_ohio_lon": first_row.ohio_lon,
        "first_ohio_lat": first_row.ohio_lat,
        "second_ohio_name": second_row.ohio,
        "second_ohio_lon": second_row.ohio_lon,
        "second_ohio_lat": second_row.ohio_lat,
        "dist_ohio": distance((first_row.ohio_lat, first_row.ohio_lon), (second_row.ohio_lat, second_row.ohio_lon)),
        "first_abroad_name": first_row.abroad,
        "first_abroad_lon": first_row.abroad_lon,
        "first_abroad_lat": first_row.abroad_lat,
        "second_abroad_name": second_row.abroad,
        "second_abroad_lon": second_row.abroad_lon,
        "second_abroad_lat": second_row.abroad_lat,
        "dist_abroad": distance((first_row.abroad_lat, first_row.abroad_lon), (second_row.abroad_lat, second_row.abroad_lon))
    })
df_pairs = pd.DataFrame.from_records(pairs)
df_pairs["ratio"] = df_pairs["dist_ohio"] / df_pairs["dist_abroad"]
df_pairs

Unnamed: 0,first_ohio_name,first_ohio_lon,first_ohio_lat,second_ohio_name,second_ohio_lon,second_ohio_lat,dist_ohio,first_abroad_name,first_abroad_lon,first_abroad_lat,second_abroad_name,second_abroad_lon,second_abroad_lat,dist_abroad,ratio
0,Amsterdam,-80.922867,40.473676,Amsterdam,-80.922867,40.473676,0.000000,"Amsterdam, Netherlands",4.893604,52.372760,"Amsterdam, Netherlands",4.893604,52.372760,0.000000,
1,Amsterdam,-80.922867,40.473676,Antrim,-81.358447,40.120347,53.927593,"Amsterdam, Netherlands",4.893604,52.372760,"Antrim, United Kingdom",-6.219204,54.715139,778.065649,0.069310
2,Amsterdam,-80.922867,40.473676,Antwerp,-84.740514,41.181439,330.682639,"Amsterdam, Netherlands",4.893604,52.372760,"Antwerp, Belgium",4.399708,51.221110,132.484407,2.496012
3,Amsterdam,-80.922867,40.473676,Athens,-82.101248,39.328924,162.190983,"Amsterdam, Netherlands",4.893604,52.372760,"Athens, Greece",23.728305,37.983941,2164.028649,0.074949
4,Amsterdam,-80.922867,40.473676,Belfort,-81.245385,40.812281,46.455181,"Amsterdam, Netherlands",4.893604,52.372760,"Belfort, France",6.862894,47.637960,544.916628,0.085252
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1171,Vienna,-80.664243,41.237834,Warsaw,-82.006906,40.335337,151.154636,"Vienna, Austria",16.372504,48.208354,"Warsaw, Poland",21.006725,52.231958,555.523683,0.272094
1172,Vienna,-80.664243,41.237834,Winesburg,-81.695128,40.616450,110.790798,"Vienna, Austria",16.372504,48.208354,"Weinsberg, Germany",9.286547,49.150304,530.471088,0.208854
1173,Warsaw,-82.006906,40.335337,Warsaw,-82.006906,40.335337,0.000000,"Warsaw, Poland",21.006725,52.231958,"Warsaw, Poland",21.006725,52.231958,0.000000,
1174,Warsaw,-82.006906,40.335337,Winesburg,-81.695128,40.616450,40.896450,"Warsaw, Poland",21.006725,52.231958,"Weinsberg, Germany",9.286547,49.150304,892.580649,0.045818


In [22]:
df_pairs.to_csv("../pairs.csv", index=False)

In [23]:
same_place = df_pairs.first_ohio_name == df_pairs.second_ohio_name
df_app = df_pairs[~same_place]
df_app.head()

Unnamed: 0,first_ohio_name,first_ohio_lon,first_ohio_lat,second_ohio_name,second_ohio_lon,second_ohio_lat,dist_ohio,first_abroad_name,first_abroad_lon,first_abroad_lat,second_abroad_name,second_abroad_lon,second_abroad_lat,dist_abroad,ratio
1,Amsterdam,-80.922867,40.473676,Antrim,-81.358447,40.120347,53.927593,"Amsterdam, Netherlands",4.893604,52.37276,"Antrim, United Kingdom",-6.219204,54.715139,778.065649,0.06931
2,Amsterdam,-80.922867,40.473676,Antwerp,-84.740514,41.181439,330.682639,"Amsterdam, Netherlands",4.893604,52.37276,"Antwerp, Belgium",4.399708,51.22111,132.484407,2.496012
3,Amsterdam,-80.922867,40.473676,Athens,-82.101248,39.328924,162.190983,"Amsterdam, Netherlands",4.893604,52.37276,"Athens, Greece",23.728305,37.983941,2164.028649,0.074949
4,Amsterdam,-80.922867,40.473676,Belfort,-81.245385,40.812281,46.455181,"Amsterdam, Netherlands",4.893604,52.37276,"Belfort, France",6.862894,47.63796,544.916628,0.085252
5,Amsterdam,-80.922867,40.473676,Berea,-81.854303,41.366161,126.383746,"Amsterdam, Netherlands",4.893604,52.37276,"Veria, Greece",22.203683,40.521534,1860.453183,0.067932


Reshape to nested to make easier to work with in json

In [24]:
def nest_record(d: dict) -> dict:
    """Take flat dict and nest by area (ohio/abroad) and then location (first/second)"""
    nested = {"ratio": d["ratio"]}
    areas = ["ohio", "abroad"]
    for area in areas:
        area_dict = {
            "dist": d[f"dist_{area}"]
        }
        locs = [f"first_{area}", f"second_{area}"]
        area_locs = []
        for loc in locs:
            area_locs.append({
                    "name": d[f"{loc}_name"],
                    "lon": d[f"{loc}_lon"],
                    "lat": d[f"{loc}_lat"]
            })
        area_dict.update({"locations": area_locs})
        nested.update({area: area_dict})
    return nested

In [25]:
nested_records = [nest_record(d) for d in df_app.to_dict("records")]

pd.DataFrame(nested_records)

Unnamed: 0,ratio,ohio,abroad
0,0.069310,"{'dist': 53.92759307798537, 'locations': [{'na...","{'dist': 778.0656491738075, 'locations': [{'na..."
1,2.496012,"{'dist': 330.68263863561356, 'locations': [{'n...","{'dist': 132.48440674373552, 'locations': [{'n..."
2,0.074949,"{'dist': 162.19098331141774, 'locations': [{'n...","{'dist': 2164.028648659992, 'locations': [{'na..."
3,0.085252,"{'dist': 46.455181214645066, 'locations': [{'n...","{'dist': 544.9166281606758, 'locations': [{'na..."
4,0.067932,"{'dist': 126.38374560908052, 'locations': [{'n...","{'dist': 1860.45318275349, 'locations': [{'nam..."
...,...,...,...
1123,0.152154,"{'dist': 210.53878598369243, 'locations': [{'n...","{'dist': 1383.7176930229075, 'locations': [{'n..."
1124,0.458483,"{'dist': 240.14428116296685, 'locations': [{'n...","{'dist': 523.7803505649325, 'locations': [{'na..."
1125,0.272094,"{'dist': 151.1546360891575, 'locations': [{'na...","{'dist': 555.5236828605172, 'locations': [{'na..."
1126,0.208854,"{'dist': 110.79079843286347, 'locations': [{'n...","{'dist': 530.4710884517987, 'locations': [{'na..."


Go through dataframe out of convenience for using .to_json()...

In [26]:
pd.DataFrame(nested_records).to_json("../app/site/pairs.json", orient="records")

In [27]:
!head -c500 ../app/site/pairs.json

[{"ratio":0.0693098238,"ohio":{"dist":53.927593078,"locations":[{"name":"Amsterdam","lon":-80.9228671,"lat":40.4736758},{"name":"Antrim","lon":-81.3584466,"lat":40.1203474}]},"abroad":{"dist":778.0656491738,"locations":[{"name":"Amsterdam, Netherlands","lon":4.8936041,"lat":52.3727598},{"name":"Antrim, United Kingdom","lon":-6.2192038,"lat":54.715139}]}},{"ratio":2.4960117705,"ohio":{"dist":330.6826386356,"locations":[{"name":"Amsterdam","lon":-80.9228671,"lat":40.4736758},{"name":"Antwerp","lon

For quiz, make subset of nested_records for which ratio is no smaller than 0.5 (i.e., Ohio distance is no less than 2x smaller than abroad).

In [28]:
nested_df = pd.DataFrame(nested_records)

In [29]:
nested_close = nested_df[nested_df.ratio >= 0.5]
len(nested_close)

84

Also include at least one pair with all locations, so they appear in quiz.

In [30]:
in_close = set(nested_close.ohio.apply(lambda x: x.get("locations")[0]["name"]).tolist() + 
   nested_close.ohio.apply(lambda x: x.get("locations")[1]["name"]).tolist())

In [31]:
len(in_close)

34

In [32]:
in_nested = set(nested_df.ohio.apply(lambda x: x.get("locations")[0]["name"]).tolist() + 
   nested_df.ohio.apply(lambda x: x.get("locations")[1]["name"]).tolist())

In [33]:
left_out = in_nested.difference(in_close)
left_out

{'Cairo',
 'Calcutta',
 'Canton',
 'Damascus',
 'Delhi',
 'Lima',
 'Matamoras',
 'Mecca Township',
 'Medina',
 'Mogadore',
 'Parral',
 'Toronto',
 'Truro Township',
 'Warsaw'}

Iteratively add remaining missing cities

In [34]:
from copy import deepcopy

nested_out = deepcopy(nested_close)

in_out = set(nested_out.ohio.apply(lambda x: x.get("locations")[0]["name"]).tolist() + 
   nested_out.ohio.apply(lambda x: x.get("locations")[1]["name"]).tolist())
in_nested = set(nested_df.ohio.apply(lambda x: x.get("locations")[0]["name"]).tolist() + 
   nested_df.ohio.apply(lambda x: x.get("locations")[1]["name"]).tolist())
left_out = in_nested.difference(in_out)


while len(left_out) > 0:
    to_add = list(left_out)[0]
    # find shortest distance available
    print(to_add)
    closest_pair = df_app[(df_app.first_ohio_name == to_add) | (df_app.second_ohio_name == to_add)].sort_values("dist_abroad").iloc[0:1]
    nested_out = nested_out.append(nest_record(closest_pair.to_dict("records")[0]), ignore_index=True)
    # updated which are still left out
    in_out = set(nested_out.ohio.apply(lambda x: x.get("locations")[0]["name"]).tolist() + 
       nested_out.ohio.apply(lambda x: x.get("locations")[1]["name"]).tolist())
    in_nested = set(nested_df.ohio.apply(lambda x: x.get("locations")[0]["name"]).tolist() + 
       nested_df.ohio.apply(lambda x: x.get("locations")[1]["name"]).tolist())
    left_out = in_nested.difference(in_out)

Parral
Canton
Warsaw
Toronto
Mogadore
Cairo
Medina
Lima
Delhi


In [35]:
nested_out.to_json("../app/site/pairs_close.json", orient="records")

In [36]:
!head -c500 ../app/site/pairs_close.json

[{"ratio":2.4960117705,"ohio":{"dist":330.6826386356,"locations":[{"name":"Amsterdam","lon":-80.9228671,"lat":40.4736758},{"name":"Antwerp","lon":-84.7405144,"lat":41.1814388}]},"abroad":{"dist":132.4844067437,"locations":[{"name":"Amsterdam, Netherlands","lon":4.8936041,"lat":52.3727598},{"name":"Antwerp, Belgium","lon":4.3997081,"lat":51.2211097}]}},{"ratio":0.5271417013,"ohio":{"dist":179.954632925,"locations":[{"name":"Amsterdam","lon":-80.9228671,"lat":40.4736758},{"name":"Bexley","lon":-82

## Pairs closest to equal distance.

Absolute distance

In [37]:
np.abs(df_app["dist_ohio"] - df_app["dist_abroad"]).sort_values().head(10)

64       2.526028
886      4.900828
881      4.951547
681      5.676635
990      6.413337
922      8.996867
36       9.911830
386     10.692827
1039    13.365331
287     15.382972
dtype: float64

In [38]:
df_app.loc[np.abs(df_app["dist_ohio"] - df_app["dist_abroad"]).sort_values().head(10).index][["first_ohio_name", "second_ohio_name", "first_abroad_name", "second_abroad_name", "dist_ohio", "dist_abroad"]]

Unnamed: 0,first_ohio_name,second_ohio_name,first_abroad_name,second_abroad_name,dist_ohio,dist_abroad
64,Antrim,Dublin,"Antrim, United Kingdom","Dublin, Ireland",149.320371,151.8464
886,Mantua,Parma,"Mantua, Italy","Parma, Italy",44.090076,48.990903
881,Mantua,Milan,"Mantua, Italy","Milano, Italy",115.428072,120.379619
681,Dublin,East Liverpool,"Dublin, Ireland","Liverpool, United Kingdom",222.559771,216.883135
990,Milan,Padua,"Milano, Italy","Padua, Italy",203.015078,209.428416
922,Maria Stein,Winesburg,"Metzerlen-Mariastein, Switzerland","Weinsberg, Germany",237.675102,228.678235
36,Amsterdam,Pyrmont,"Amsterdam, Netherlands","Bad Pyrmont, Germany",309.574324,299.662494
386,Cadiz,Seville,"Cadiz, Spain","Seville, Spain",109.788434,99.095607
1039,New Riegel,Winesburg,"Riegel, Germany","Weinsberg, Germany",144.887817,158.253148
287,Bexley,Kimbolton,"Bexley, United Kingdom","Kimbolton, United Kingdom",117.929407,102.546435


Ratio distance

In [39]:
df_pairs.sort_values(by="ratio", key= lambda col: np.abs(col - 1)).head(30)

Unnamed: 0,first_ohio_name,first_ohio_lon,first_ohio_lat,second_ohio_name,second_ohio_lon,second_ohio_lat,dist_ohio,first_abroad_name,first_abroad_lon,first_abroad_lat,second_abroad_name,second_abroad_lon,second_abroad_lat,dist_abroad,ratio
64,Antrim,-81.358447,40.120347,Dublin,-83.114077,40.099229,149.320371,"Antrim, United Kingdom",-6.219204,54.715139,"Dublin, Ireland",-6.260273,53.349764,151.8464,0.983365
681,Dublin,-83.114077,40.099229,East Liverpool,-80.577293,40.618676,222.559771,"Dublin, Ireland",-6.260273,53.349764,"Liverpool, United Kingdom",-2.99168,53.407199,216.883135,1.026174
990,Milan,-82.605455,41.297552,Padua,-84.783852,40.508379,203.015078,"Milano, Italy",9.1905,45.4668,"Padua, Italy",11.873446,45.407717,209.428416,0.969377
36,Amsterdam,-80.922867,40.473676,Pyrmont,-84.460782,39.811998,309.574324,"Amsterdam, Netherlands",4.893604,52.37276,"Bad Pyrmont, Germany",9.243464,51.98505,299.662494,1.033077
922,Maria Stein,-84.493286,40.407825,Winesburg,-81.695128,40.61645,237.675102,"Metzerlen-Mariastein, Switzerland",7.490015,47.476261,"Weinsberg, Germany",9.286547,49.150304,228.678235,1.039343
881,Mantua,-81.223991,41.283944,Milan,-82.605455,41.297552,115.428072,"Mantua, Italy",10.670837,45.169263,"Milano, Italy",9.1905,45.4668,120.379619,0.958867
1039,New Riegel,-83.318534,41.051444,Winesburg,-81.695128,40.61645,144.887817,"Riegel, Germany",7.740185,48.159257,"Weinsberg, Germany",9.286547,49.150304,158.253148,0.915545
886,Mantua,-81.223991,41.283944,Parma,-81.735569,41.38224,44.090076,"Mantua, Italy",10.670837,45.169263,"Parma, Italy",10.328083,44.801368,48.990903,0.899965
386,Cadiz,-80.996763,40.272845,Seville,-81.86236,41.010055,109.788434,"Cadiz, Spain",-6.292898,36.529744,"Seville, Spain",-5.99534,37.38863,99.095607,1.107904
280,Bexley,-82.936864,39.969238,Cheviot,-84.613279,39.157003,169.72648,"Bexley, United Kingdom",0.150488,51.441679,"Cheviot Hills, United Kingdom",-2.108786,52.570844,199.18121,0.852121


## Furthest in Ohio vs abroad

In [40]:
df_pairs.sort_values(by="ratio", ascending=False).head(20)

Unnamed: 0,first_ohio_name,first_ohio_lon,first_ohio_lat,second_ohio_name,second_ohio_lon,second_ohio_lat,dist_ohio,first_abroad_name,first_abroad_lon,first_abroad_lat,second_abroad_name,second_abroad_lon,second_abroad_lat,dist_abroad,ratio
207,Belfort,-81.245385,40.812281,Maria Stein,-84.493286,40.407825,277.816667,"Belfort, France",6.862894,47.63796,"Metzerlen-Mariastein, Switzerland",7.490015,47.476261,50.377133,5.514738
551,Cheviot,-84.613279,39.157003,East Liverpool,-80.577293,40.618676,380.727312,"Cheviot Hills, United Kingdom",-2.108786,52.570844,"Liverpool, United Kingdom",-2.99168,53.407199,110.185075,3.455344
1032,New Riegel,-83.318534,41.051444,Strasburg,-81.526786,40.594784,159.083773,"Riegel, Germany",7.740185,48.159257,"Strasbourg, France",7.750713,48.584614,47.303971,3.363011
782,Kimbolton,-81.572065,40.152849,Oxford,-84.742052,39.510305,279.937808,"Kimbolton, United Kingdom",-0.381399,52.303459,"Oxford, United Kingdom",-1.25785,51.752013,85.762941,3.264088
885,Mantua,-81.223991,41.283944,Padua,-84.783852,40.508379,311.366186,"Mantua, Italy",10.670837,45.169263,"Padua, Italy",11.873446,45.407717,97.743771,3.185535
732,East Liverpool,-80.577293,40.618676,Stockport,-81.792912,39.548406,157.662919,"Liverpool, United Kingdom",-2.99168,53.407199,"Stockport, United Kingdom",-2.160243,53.407901,55.111892,2.860779
572,Cheviot,-84.613279,39.157003,Stockport,-81.792912,39.548406,246.364983,"Cheviot Hills, United Kingdom",-2.108786,52.570844,"Stockport, United Kingdom",-2.160243,53.407901,93.140168,2.645099
1060,Padua,-84.783852,40.508379,Ravenna,-81.242047,41.157557,306.573794,"Padua, Italy",11.873446,45.407717,"Ravenna, Italy",12.059009,44.364061,116.966416,2.621041
2,Amsterdam,-80.922867,40.473676,Antwerp,-84.740514,41.181439,330.682639,"Amsterdam, Netherlands",4.893604,52.37276,"Antwerp, Belgium",4.399708,51.22111,132.484407,2.496012
553,Cheviot,-84.613279,39.157003,Kimbolton,-81.572065,40.152849,282.905008,"Cheviot Hills, United Kingdom",-2.108786,52.570844,"Kimbolton, United Kingdom",-0.381399,52.303459,120.808334,2.341767


## Furthest abroad vs Ohio

In [41]:
df_pairs.sort_values(by="ratio", ascending=True).head(20)

Unnamed: 0,first_ohio_name,first_ohio_lon,first_ohio_lat,second_ohio_name,second_ohio_lon,second_ohio_lat,dist_ohio,first_abroad_name,first_abroad_lon,first_abroad_lat,second_abroad_name,second_abroad_lon,second_abroad_lat,dist_abroad,ratio
1090,Parral,-81.497062,40.561174,Strasburg,-81.526786,40.594784,4.502116,"Parral, Mexico",-93.002907,16.36923,"Strasbourg, France",7.750713,48.584614,9414.663638,0.000478
409,Cairo,-84.08198,40.833128,Lima,-84.105006,40.739978,10.537613,"Cairo, Egypt",31.235726,30.044388,"Lima, Peru",-77.036526,-12.062107,12422.48129,0.000848
443,Calcutta,-80.569918,40.685178,East Liverpool,-80.577293,40.618676,7.420857,"Calcutta, India",88.363882,22.572672,"Liverpool, United Kingdom",-2.99168,53.407199,8098.564341,0.000916
548,Cheviot,-84.613279,39.157003,Delhi,-84.605222,39.095059,6.922749,"Cheviot Hills, United Kingdom",-2.108786,52.570844,"Delhi, India",77.221939,28.651718,6821.454554,0.001015
193,Belfort,-81.245385,40.812281,Canton,-81.374951,40.798546,11.011596,"Belfort, France",6.862894,47.63796,"Guangzhou, China",113.259294,23.130196,9271.06186,0.001188
1097,Parral,-81.497062,40.561174,Winesburg,-81.695128,40.61645,17.818557,"Parral, Mexico",-93.002907,16.36923,"Weinsberg, Germany",9.286547,49.150304,9499.85,0.001876
497,Canton,-81.374951,40.798546,Parral,-81.497062,40.561174,28.332053,"Guangzhou, China",113.259294,23.130196,"Parral, Mexico",-93.002907,16.36923,14776.360501,0.001917
467,Calcutta,-80.569918,40.685178,Toronto,-80.600906,40.464233,24.70692,"Calcutta, India",88.363882,22.572672,"Toronto, Ontario, Canada",-79.383935,43.653482,12545.515696,0.001969
448,Calcutta,-80.569918,40.685178,Lisbon,-80.768134,40.772004,19.292165,"Calcutta, India",88.363882,22.572672,"Lisbon, Portugal",-9.136592,38.707751,9074.141535,0.002126
310,Bexley,-82.936864,39.969238,Truro Township,-82.806986,39.955209,11.178507,"Bexley, United Kingdom",0.150488,51.441679,"Truro, Nova Scotia, Canada",-63.300006,45.366668,4583.280994,0.002439
