In [33]:
import pandas as pd
from difflib import get_close_matches
import warnings
from math import sin, cos, sqrt, atan2, radians

In [5]:
# data comes from https://simplemaps.com/data/us-cities
# all uses of this tool must provide a reference to this source
city_df = pd.read_csv('uscities.csv')

In [80]:
# use get closes matches to search for city names
get_close_matches('podunk', city_df['city'], n=10)

['Funk']

In [87]:
# small function to return states with that
def get_closests_cities(a_city, n=10):
    '''
    Return the n closest matches to a given city name as a dataframe
    '''
    return city_df.loc[city_df['city'].isin(get_close_matches(a_city, city_df['city'], n=n))]

In [88]:
get_closests_cities('Funk')

Unnamed: 0,city,city_ascii,state_id,state_name,county_fips,county_name,county_fips_all,county_name_all,lat,lng,population,density,source,military,incorporated,timezone,ranking,zips,id
1800,Frank,Frank,WV,West Virginia,54075,Pocahontas,54075,Pocahontas,38.5462,-79.808,65.0,65.0,polygon,False,False,America/New_York,3,26264 24920,1840025652
8197,Funk,Funk,NE,Nebraska,31137,Phelps,31137,Phelps,40.463,-99.2508,185.0,271.0,polygon,False,True,America/Chicago,3,68940,1840012078
9958,Hunker,Hunker,PA,Pennsylvania,42129,Westmoreland,42129,Westmoreland,40.2041,-79.6155,280.0,427.0,polygon,False,True,America/New_York,3,15639,1840001138
15417,Fouke,Fouke,AR,Arkansas,5091,Miller,5091,Miller,33.2622,-93.8852,867.0,246.0,polygon,False,True,America/Chicago,3,71837,1840013752
20169,Funkstown,Funkstown,MD,Maryland,24043,Washington,24043,Washington,39.6089,-77.7082,877.0,921.0,polygon,False,True,America/New_York,3,21734 21740,1840005609
22228,Kunkle,Kunkle,OH,Ohio,39171,Williams,39171,Williams,41.6359,-84.4942,287.0,395.0,polygon,False,False,America/New_York,3,43531 43501,1840026276
24398,Funkley,Funkley,MN,Minnesota,27007,Beltrami,27007,Beltrami,47.786,-94.4266,5.0,2.0,polygon,False,True,America/Chicago,3,56630,1840007698
25842,Susank,Susank,KS,Kansas,20009,Barton,20009,Barton,38.6407,-98.7745,32.0,124.0,polygon,False,True,America/Chicago,3,67544,1840009651
26048,Falun,Falun,KS,Kansas,20169,Saline,20169,Saline,38.6747,-97.751,80.0,27.0,polygon,False,False,America/Chicago,3,67442,1840024598
27217,Furman,Furman,SC,South Carolina,45049,Hampton,45049,Hampton,32.6812,-81.1877,219.0,27.0,polygon,False,True,America/New_York,3,29921 29918,1840016925


In [89]:
def get_single_city(a_city, a_state):
    '''
    Return dataframe entry for a single city
    '''
    city = city_df.loc[(city_df['city']==a_city) & (city_df['state_id']==a_state)]
    city = city.reset_index()
    if city.shape[0]>1:
        warnings.warn('\nMultiple matches found, returning first match.')
    if city.shape[0]<1:
        warnings.warn('\nNo matches found. Run get_closests_cities to find cities.')
        return
    return city.iloc[0]

In [90]:
get_single_city('Frank', 'WV')

index                          1800
city                          Frank
city_ascii                    Frank
state_id                         WV
state_name            West Virginia
county_fips                   54075
county_name              Pocahontas
county_fips_all               54075
county_name_all          Pocahontas
lat                         38.5462
lng                         -79.808
population                       65
density                          65
source                      polygon
military                      False
incorporated                  False
timezone           America/New_York
ranking                           3
zips                    26264 24920
id                       1840025652
Name: 0, dtype: object

In [91]:
def get_city_coords(a_city, a_state):
    '''
    Get lat and lng for a single city
    '''
    a_city = get_single_city(a_city, a_state)
    lat = a_city['lat']
    lng = a_city['lng']
    return lat, lng

In [94]:
get_city_coords('Funkley', 'MN')

(47.786, -94.4266)

In [95]:
import math

def distance(origin, destination):
    '''
    Return distance between two sets of coords
    in KM
    '''
    lat1, lon1 = origin
    lat2, lon2 = destination
    radius = 6371 # km

    dlat = math.radians(lat2-lat1)
    dlon = math.radians(lon2-lon1)
    a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
        * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = radius * c

    return d

In [96]:
def city_distance(city_1, state_1, city_2, state_2):
    '''
    Return distance between two cities in KM
    '''
    return distance(get_city_coords(city_1, state_1), 
                    get_city_coords(city_2, state_2))

In [97]:
city_distance('Funkley', 'MN', 'Kunkle', 'OH')

1039.5226490891246