In [1]:
import pandas as pd
import math

In [2]:
def getDistanceFromLatLonInKm(lat1,lon1,lat2,lon2):
  R = 6371 # Radius of the earth in km
  dLat = deg2rad(lat2-lat1);  # deg2rad below
  dLon = deg2rad(lon2-lon1); 
  a =  math.sin(dLat/2) * math.sin(dLat/2) + math.cos(deg2rad(lat1)) * math.cos(deg2rad(lat2)) * math.sin(dLon/2) * math.sin(dLon/2)
  c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
  d = R * c ## Distance in km
  return d

def deg2rad(deg):
  return deg * (math.pi/180)

In [3]:
getDistanceFromLatLonInKm(40.6943,-73.9249,44.9635, -93.2678)

1642.6907710110397

In [4]:
data_cities = pd.read_csv('data/uscities.csv')

In [5]:
cities = ['Atlanta', 'Boston', 'Chicago', 'Charlotte', 'Dallas-Fort Worth', 'Denver', 'Detroit', 'Houston', 'Las Vegas', 'Los Angeles', 'Miami', 'Minneapolis', 'Orlando', 'Philadelphia', 'Phoenix', 'San Francisco', 'New York', 'Seattle', 'Newark']

In [6]:
data_cities.iloc[4,0] = 'Dallas-Fort Worth'

In [7]:
data_cities = data_cities.drop(['city_ascii', 'state_id', 'state_name', 'county_fips', 'county_name', 'source', 'military', 'incorporated', 'zips', 'id', 'timezone', 'ranking'], axis=1)

In [8]:
data_filtered = data_cities[data_cities.city.isin(cities)]

In [9]:
data_filtered = data_filtered[data_filtered.population > 100000]

In [10]:
data_filtered

Unnamed: 0,city,lat,lng,population,density
0,New York,40.6943,-73.9249,18713220,10715.0
1,Los Angeles,34.1139,-118.4068,12750807,3276.0
2,Chicago,41.8373,-87.6862,8604203,4574.0
3,Miami,25.7839,-80.2102,6445545,5019.0
4,Dallas-Fort Worth,32.7936,-96.7662,5743938,1526.0
5,Philadelphia,40.0077,-75.1339,5649300,4554.0
6,Houston,29.7863,-95.3889,5464251,1399.0
7,Atlanta,33.7627,-84.4224,5449398,1441.0
9,Boston,42.3188,-71.0846,4688346,5532.0
10,Phoenix,33.5722,-112.0891,4219697,1253.0


In [11]:
data_filtered['key'] = 'xyz'

In [14]:
cross_df = pd.merge(data_filtered[['city', 'lat', 'lng', 'population', 'density', 'key']], 
                    data_filtered[['city', 'lat', 'lng', 'population', 'density', 'key']], 
                    on='key', 
                    suffixes=('_departure', '_arrival')
                    ).drop('key', axis=1)

In [15]:
cross_df

Unnamed: 0,city_departure,lat_departure,lng_departure,population_departure,density_departure,city_arrival,lat_arrival,lng_arrival,population_arrival,density_arrival
0,New York,40.6943,-73.9249,18713220,10715.0,New York,40.6943,-73.9249,18713220,10715.0
1,New York,40.6943,-73.9249,18713220,10715.0,Los Angeles,34.1139,-118.4068,12750807,3276.0
2,New York,40.6943,-73.9249,18713220,10715.0,Chicago,41.8373,-87.6862,8604203,4574.0
3,New York,40.6943,-73.9249,18713220,10715.0,Miami,25.7839,-80.2102,6445545,5019.0
4,New York,40.6943,-73.9249,18713220,10715.0,Dallas-Fort Worth,32.7936,-96.7662,5743938,1526.0
...,...,...,...,...,...,...,...,...,...,...
356,Newark,40.7245,-74.1725,282011,4509.0,Denver,39.7621,-104.8759,2876625,1831.0
357,Newark,40.7245,-74.1725,282011,4509.0,Las Vegas,36.2333,-115.2654,2104198,1773.0
358,Newark,40.7245,-74.1725,282011,4509.0,Orlando,28.4772,-81.3369,1822394,1003.0
359,Newark,40.7245,-74.1725,282011,4509.0,Charlotte,35.2080,-80.8304,1512923,1113.0


In [18]:
cross_df['distance'] = cross_df.apply(lambda row: getDistanceFromLatLonInKm(row.lat_departure, row.lng_departure, row.lat_arrival, row.lng_arrival), axis=1)

In [19]:
cross_df

Unnamed: 0,city_departure,lat_departure,lng_departure,population_departure,density_departure,city_arrival,lat_arrival,lng_arrival,population_arrival,density_arrival,distance
0,New York,40.6943,-73.9249,18713220,10715.0,New York,40.6943,-73.9249,18713220,10715.0,0.000000
1,New York,40.6943,-73.9249,18713220,10715.0,Los Angeles,34.1139,-118.4068,12750807,3276.0,3953.628213
2,New York,40.6943,-73.9249,18713220,10715.0,Chicago,41.8373,-87.6862,8604203,4574.0,1155.916177
3,New York,40.6943,-73.9249,18713220,10715.0,Miami,25.7839,-80.2102,6445545,5019.0,1756.700669
4,New York,40.6943,-73.9249,18713220,10715.0,Dallas-Fort Worth,32.7936,-96.7662,5743938,1526.0,2208.283446
...,...,...,...,...,...,...,...,...,...,...,...
356,Newark,40.7245,-74.1725,282011,4509.0,Denver,39.7621,-104.8759,2876625,1831.0,2594.877861
357,Newark,40.7245,-74.1725,282011,4509.0,Las Vegas,36.2333,-115.2654,2104198,1773.0,3578.807900
358,Newark,40.7245,-74.1725,282011,4509.0,Orlando,28.4772,-81.3369,1822394,1003.0,1510.109592
359,Newark,40.7245,-74.1725,282011,4509.0,Charlotte,35.2080,-80.8304,1512923,1113.0,846.184556
