In [1]:
import pandas as pd, numpy as np
from choicemodels.tools import distancematrix as dm

tract_centroids_file = '../data/bay_tract_centroids.csv'
distance_matrix_file = '../data/bay_tracts_distance_matrix.csv'

  from pandas.core import datetools


## Load the data

In [2]:
# use double-precision floating points to ensure sufficient significant digits
dtypes = {'GEOID10':str, 'lat':np.float64, 'lng':np.float64}
df = pd.read_csv(tract_centroids_file, dtype=dtypes).sort_values(by='GEOID10')
len(df)

1588

In [3]:
# alternatively, create a randomized dataframe of length n to test performance relative to size
#n = 10000
#df = pd.DataFrame({'GEOID10':range(n), 'lng':np.random.random(n), 'lat':np.random.random(n)})

In [4]:
# index the dataframe by place identifier (i.e., census tract ID)
df = df.set_index('GEOID10')

## Calculate distance matrices

First, the euclidean distance matrix in units of degrees

In [5]:
%%time
df_eu_dm = dm.distance_matrix(df, method='euclidean')

Wall time: 34 ms


In [6]:
print(df_eu_dm.shape)
print(df_eu_dm.size)
df_eu_dm.iloc[:5, :5]

(1588, 1588)
2521744


Unnamed: 0,06001400100,06001400200,06001400300,06001400400,06001400500
6001400100,0.0,0.026261,0.035165,0.032078,0.03798
6001400200,0.026261,0.0,0.008998,0.007887,0.015172
6001400300,0.035165,0.008998,0.0,0.008192,0.012998
6001400400,0.032078,0.007887,0.008192,0.0,0.007288
6001400500,0.03798,0.015172,0.012998,0.007288,0.0


Next, the great circle distance matrix in units of meters

In [7]:
%%time
df_gc_dm = dm.distance_matrix(df, method='greatcircle')

  arc = np.arccos(cos)


Wall time: 1.47 s


In [8]:
print(df_gc_dm.shape)
print(df_gc_dm.size)
df_gc_dm.iloc[:5, :5]

(1588, 1588)
2521744


Unnamed: 0,06001400100,06001400200,06001400300,06001400400,06001400500
6001400100,0,2659,3595,3111,3579
6001400200,2659,0,944,692,1332
6001400300,3595,944,0,887,1262
6001400400,3111,692,887,0,640
6001400500,3579,1332,1262,640,0


In [9]:
# save distance matrix to disk
df_gc_dm.to_csv(distance_matrix_file, index=True, encoding='utf_8')