This notebook is used to prepare amenity data (grocery stores) into distance data that can be used for clustering and visualization.



---

### Imports

In [1]:
import os
import json
import pickle
from collections import defaultdict
import gzip

import pandas as pd
import numpy as np
import scipy.spatial as spatial

import geopandas as gp
from shapely.geometry import Point, Polygon

from IPython.display import display

### Define working directory

In [2]:
ROOT = '/media/school/project/amenities'

### Read Gazetteer table

GEOIDS and lat/long for tract centers are pulled from this table.

In [3]:
gaz = pd.read_pickle(os.path.join(ROOT, '2018_5yr_cendatagov_GAZ_v3.pkl'))
gaz.GEOID = gaz.GEOID.astype(int)
gaz.columns = [x.strip() for x in gaz.columns]
gaz.head(1)

Unnamed: 0,USPS,GEOID,ALAND,AWATER,ALAND_SQMI,AWATER_SQMI,INTPTLAT,INTPTLONG
0,AL,1001020100,9817813,28435,3.791,0.011,32.481959,-86.491338


### Read Geopandas dataframe

This is not really used for processing but gives some evidence of sanity-checks.

In [4]:
with gzip.GzipFile(os.path.join(ROOT, 'all_census_tract_shapes.json.gz'), 'r') as f:
    TRACT_ALL = json.loads(f.read().decode('utf-8'))
gpdf = gp.GeoDataFrame.from_features(TRACT_ALL['features'])
gpdf.GEOID = gpdf.GEOID.astype(int)
gpdf.head(1)

Unnamed: 0,geometry,STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER
0,"POLYGON ((-93.16468 30.21663, -93.16392 30.216...",22,19,980000,1400000US22019980000,22019980000,9800,CT,5398742,2339


### Read Amenities Data

In [5]:
AMENITIES = {}
amenity_src_path = '../../amenities/source-data'

for amenity_file in os.listdir(amenity_src_path):
    print(f'Reading {amenity_file}')
    name = amenity_file.split('.')[0].strip().replace('-', '_').split('_')[0].upper()
    data = pd.read_csv(os.path.join(amenity_src_path, amenity_file))
    AMENITIES[name] = data
    
print(AMENITIES.keys(), end='\n\n')
for k, v in AMENITIES.items():
    print(f'{k} shape: {v.shape}')

Reading grocery-stores.csv
Reading gyms.csv
dict_keys(['GROCERY', 'GYMS'])

GROCERY shape: (53518, 11)
GYMS shape: (49348, 11)


### Prepare the first fiew fields of the `result` table.

This will be used to aggregate results throughout the code.

In [6]:
# result with start off with the gaz GEOID and lat/long columns
result = gaz[['GEOID', 'INTPTLAT', 'INTPTLONG']]
result.head()

Unnamed: 0,GEOID,INTPTLAT,INTPTLONG
0,1001020100,32.481959,-86.491338
1,1001020200,32.475758,-86.472468
2,1001020300,32.474024,-86.459703
3,1001020400,32.47103,-86.444835
4,1001020500,32.458922,-86.421826


The following is a test to see if all geoids in gaz have corresponding geometries in gpdf.

219 in gaz are not in gpdf. These are tracts that correspond to waterways (bays, lakes, etc). A few were spot checked, for the rest, it was confirmed that ALAND was 0 in 218 records. The last record has ALAND > 0, but corresponds to tampa bay.

In [7]:
if not os.path.exists(os.path.join(ROOT, 'gaz_to_gpdf_geoid_mapping.pickle')):
    gaz_to_gpdf_geoid_mapping = {}
    for i, row in result.iterrows():
        print('\r{} '.format(i+1), end='')
        geoid = row.GEOID
        lat = float(row.INTPTLAT)
        lon = float(row.INTPTLONG)
        pt = Point(lon, lat) # order here is important!!

        # set default mapping when point not found within geometry
        gaz_to_gpdf_geoid_mapping[geoid] = None

        # if there is a matching geoid, set that
        gpdf_geoid_match = gpdf[gpdf.GEOID.isin([geoid])]
        if gpdf_geoid_match.shape[0] == 1:
            gaz_to_gpdf_geoid_mapping[geoid] = gpdf_geoid_match.GEOID.values[0]
            print('FOUND')
        # otherwise, find if it falls in a geometry
        else:
            for j, row_gpdf in gpdf.iterrows():          
                poly = row_gpdf.geometry
                found_flag = None
                if pt.within(poly):
                    print('FOUND, extra processing')
                    found_flag = True
                    gaz_to_gpdf_geoid_mapping[geoid] = row_gpdf.GEOID
                    break
            if not found_flag:
                print('===NOT FOUND===')
    with open(os.path.join(ROOT, 'gaz_to_gpdf_geoid_mapping.pickle'), 'wb') as f:
        pickle.dump(gaz_to_gpdf_geoid_mapping, f, protocol=4)
else:
    with open(os.path.join(ROOT, 'gaz_to_gpdf_geoid_mapping.pickle'), 'rb') as handle:
        gaz_to_gpdf_geoid_mapping = pickle.load(handle)    

In [8]:
len([x for x,y in gaz_to_gpdf_geoid_mapping.items() if y is not None])

72837

In [9]:
len([x for x,y in gaz_to_gpdf_geoid_mapping.items() if y is None])

219

In [10]:
gaz_geo_set = set(gaz.GEOID)
gpdf_geo_set = set(gpdf.GEOID)
print('Number of gaz geoids in gpdf: {}'.format(
    len(gaz_geo_set.intersection(gpdf_geo_set))))
print('Number of gaz geoids NOT in gpdf: {}'.format(
    gaz.shape[0] - len(gaz_geo_set.intersection(gpdf_geo_set))))

Number of gaz geoids in gpdf: 72837
Number of gaz geoids NOT in gpdf: 219


Given that these are equivalent, we can say it is likely that there are no geometries for the bays, lakes, etc that fall in the 219.

### For each business in each amenity, identify the census tract geoid in which it falls.

In [11]:
def find_within_census_tract(df):
    geoid_matches = []

    # create a dict of subsets of a merged dataframe for faster processing below
    gaz_gpdf = gaz.merge(gpdf, how='left', on='GEOID')
    gaz_gpdf_subset_dict = {}
    for state in gaz_gpdf.USPS.unique():
        gaz_gpdf_subset_dict[state] = gaz_gpdf[gaz_gpdf.USPS==state]

    for i, row in df.iterrows():
        print('\r{} '.format(i+1, end=''))
        pt = Point(row.lon_cleaned, row.lat_cleaned)
        state = row.state

        # subset to state to reduce search space
        gaz_gpdf_subset = gaz_gpdf_subset_dict[state]
        geoid_match = None # set default as None for no match
        # iterate through geometries
        for j, row_j in gaz_gpdf_subset.iterrows():
            # update geoid_match if found
            poly = row_j.geometry
            if not poly:
                continue
            if pt.within(poly):
                print('FOUND')
                geoid_match = row_j.GEOID
                break

        # append geoid_match
        geoid_matches.append(geoid_match)

        if not geoid_match:
            print('===NOT FOUND===')
    
    return geoid_matches


for name, df in AMENITIES.items():
    temp_name = f'{name}.stage1.pkl'
    temp_path = os.path.join(ROOT, temp_name)
    if os.path.exists(temp_path):
        print(f'Loading existing data at: {temp_path}')
        df = pd.read_pickle(temp_path)
        display(df.head())
        AMENITIES[name] = df
    else:
        print(f'Working on {name} amenity data. Finding GEOID in which each store falls.')
        geoid_matches = find_within_census_tract(df)
        df[f'{name}_GEOID_MATCH'] = geoid_matches
        df.to_pickle(os.path.join(ROOT, temp_name), protocol=4)        

Loading existing data at: /media/school/project/amenities/GROCERY.stage1.pkl


Unnamed: 0,zip,city,state,name,address,address_combo,postalCode,lat,lat_cleaned,lon,lon_cleaned,GROCERY_GEOID_MATCH
0,60415,Chicago Ridge,IL,ALDI,:arlem & Southwest Hwy,":arlem & Southwest Hwy,60415",60415,41.700313,41.70031,-87.797054,-87.79705,17031820000.0
1,40219,Louisville,KY,B & E Salvage Store,.,".,40219",40219,38.128021,38.12802,-85.679443,-85.67944,21111010000.0
2,79106,Amarillo,TX,Butch,.,".,79109",79109,35.190903,35.1909,-101.845847,-101.84585,48375010000.0
3,60901,Kankakee,IL,Jewl,...,"...,60914",60914,41.186157,41.18616,-87.893356,-87.89336,17091010000.0
4,54017,New Richmond,WI,Indomaret,@ Fresh market,"@ Fresh market,54017",54017,45.119296,45.1193,-92.537666,-92.53767,55109120000.0


Loading existing data at: /media/school/project/amenities/GYMS.stage1.pkl


Unnamed: 0,zip,city,state,name,address,address_combo,postalCode,lat,lat_cleaned,lon,lon_cleaned,GYMS_GEOID_MATCH
0,30720,Dalton,GA,First Presbyterian Church Rec Room,?,"?,30720",30720,34.794614,34.794614,-84.997768,-84.997768,13313000000.0
1,30084,Tucker,GA,The Gym,.,".,30345",30345,33.850755,33.850755,-84.247993,-84.247993,13089020000.0
2,32826,Orlando,FL,Sick Fitness,.,".,32826",32826,28.581695,28.581695,-81.202183,-81.202183,12095020000.0
3,49506,Grand Rapids,MI,Stairstepper,.,".,49546",49546,42.925879,42.925879,-85.605882,-85.605882,26081000000.0
4,60104,Bellwood,IL,Boys And Girls Club,.,".,60104",60104,41.880768,41.880768,-87.88269,-87.88269,17031820000.0


In [12]:
for name, df in AMENITIES.items():
    print(f'\n\n{name}')
    display(df.head(3))



GROCERY


Unnamed: 0,zip,city,state,name,address,address_combo,postalCode,lat,lat_cleaned,lon,lon_cleaned,GROCERY_GEOID_MATCH
0,60415,Chicago Ridge,IL,ALDI,:arlem & Southwest Hwy,":arlem & Southwest Hwy,60415",60415,41.700313,41.70031,-87.797054,-87.79705,17031820000.0
1,40219,Louisville,KY,B & E Salvage Store,.,".,40219",40219,38.128021,38.12802,-85.679443,-85.67944,21111010000.0
2,79106,Amarillo,TX,Butch,.,".,79109",79109,35.190903,35.1909,-101.845847,-101.84585,48375010000.0




GYMS


Unnamed: 0,zip,city,state,name,address,address_combo,postalCode,lat,lat_cleaned,lon,lon_cleaned,GYMS_GEOID_MATCH
0,30720,Dalton,GA,First Presbyterian Church Rec Room,?,"?,30720",30720,34.794614,34.794614,-84.997768,-84.997768,13313000000.0
1,30084,Tucker,GA,The Gym,.,".,30345",30345,33.850755,33.850755,-84.247993,-84.247993,13089020000.0
2,32826,Orlando,FL,Sick Fitness,.,".,32826",32826,28.581695,28.581695,-81.202183,-81.202183,12095020000.0


### Find count of each amenity within each tract

In [13]:
def within_tract_counts(name, df):
    within_geoid_counts = df.groupby(f'{name}_GEOID_MATCH')[f'{name}_GEOID_MATCH'].count()
    print(within_geoid_counts.min(), within_geoid_counts.max())
    within_geoid_counts.name = f'N_{name}_WITHIN_TRACT'
    within_geoid_counts = within_geoid_counts.reset_index()
    return within_geoid_counts

AMENITY_WITHIN_TRACT_COUNTS = {}
for name, df in AMENITIES.items():
    AMENITY_WITHIN_TRACT_COUNTS[name] = within_tract_counts(name, df)
    print(f'\n\n{name}')
    display(AMENITY_WITHIN_TRACT_COUNTS[name].head(3))

1 17


GROCERY


Unnamed: 0,GROCERY_GEOID_MATCH,N_GROCERY_WITHIN_TRACT
0,1001020000.0,1
1,1001020000.0,3
2,1001021000.0,1


1 26


GYMS


Unnamed: 0,GYMS_GEOID_MATCH,N_GYMS_WITHIN_TRACT
0,1001020000.0,2
1,1001020000.0,3
2,1001021000.0,1


Append the results and perform check.

In [14]:
for name, df in AMENITY_WITHIN_TRACT_COUNTS.items():
    result = result.merge(df, how='left', left_on='GEOID', right_on=f'{name}_GEOID_MATCH')
result.head()

Unnamed: 0,GEOID,INTPTLAT,INTPTLONG,GROCERY_GEOID_MATCH,N_GROCERY_WITHIN_TRACT,GYMS_GEOID_MATCH,N_GYMS_WITHIN_TRACT
0,1001020100,32.481959,-86.491338,,,,
1,1001020200,32.475758,-86.472468,,,,
2,1001020300,32.474024,-86.459703,1001020000.0,1.0,1001020000.0,2.0
3,1001020400,32.47103,-86.444835,,,,
4,1001020500,32.458922,-86.421826,1001020000.0,3.0,1001020000.0,3.0


Check if number of unique geoids same in each table.

In [15]:
for name, df in AMENITY_WITHIN_TRACT_COUNTS.items():
    print(result[f'{name}_GEOID_MATCH'].nunique() ==\
          AMENITY_WITHIN_TRACT_COUNTS[name][f'{name}_GEOID_MATCH'].nunique())

True
True


Fill missing data with 0 (i.e. no stores found within that tract).

Drop the GEOID_MATCH field. No longer needed.

In [16]:
for col in [x for x in result.columns if x.startswith('N_')]:
    result[col] = result[col].fillna(0)
    result = result.drop(columns=['{}_GEOID_MATCH'.format(col.split('_')[1])])
result

Unnamed: 0,GEOID,INTPTLAT,INTPTLONG,N_GROCERY_WITHIN_TRACT,N_GYMS_WITHIN_TRACT
0,1001020100,32.481959,-86.491338,0.0,0.0
1,1001020200,32.475758,-86.472468,0.0,0.0
2,1001020300,32.474024,-86.459703,1.0,2.0
3,1001020400,32.471030,-86.444835,0.0,0.0
4,1001020500,32.458922,-86.421826,3.0,3.0
...,...,...,...,...,...
73051,56043000200,43.878830,-107.669052,0.0,0.0
73052,56043000301,44.014369,-107.956379,0.0,0.0
73053,56043000302,44.028771,-107.950748,0.0,1.0
73054,56045951100,43.846213,-104.570020,0.0,0.0


The following blocks of commented code were a first attempt to calculate stores that fell within a certain radius of a census tract center. This would have taken far too long to compute (~5 days).

In [17]:
### Now, compute the stores within radius of neighborhood centers
# N_GROCERY_WT_2_MI, N_GROCERY_WT_5_MI, N_GROCERY_WT_30_MI 


In [18]:
# from sklearn.metrics.pairwise import haversine_distances
# from math import radians

# # example
# bsas = [-34.83333, -58.5166646]
# paris = [49.0083899664, 2.53844117956]
# bsas_in_radians = [radians(_) for _ in bsas]
# paris_in_radians = [radians(_) for _ in paris]
# r = haversine_distances([bsas_in_radians, paris_in_radians])
# r = (r * 6371000/1000) # multiply by Earth radius to get kilometers
# r = (r * 0.621371)# km to mile
# r

In [19]:
# # Since doing many pairwise calcs, prep all items before the distance calcs are performed
# # will parallelize the distance calcs

# tract_prep = [] # store (i, prepared_location)
# # iterate through each tract
# for i, row in result.iterrows():
#     tract_lat = row.INTPTLAT
#     tract_lon = row.INTPTLONG
#     tract_in_rads = [radians(x) for x in [tract_lat, tract_lon]]
#     tract_prep.append((i, tract_in_rads))

# amen_prep = []
# for j, rowj in gro.iterrows():
#     am_lat = rowj.lat_cleaned
#     am_lon = rowj.lon_cleaned
#     am_in_rads = [radians(x) for x in [am_lat, am_lon]]
#     amen_prep.append((j, am_in_rads))
    
# print(len(tract_prep), len(amen_prep))

In [20]:
# iterator = ((x,y) for x in tract_prep for y in amen_prep) # build iterator
# distances = []
# count = 1.0
# total = len(tract_prep) * len(amen_prep)

# for a, b in iterator:
#     print('{}%'.format(round(count/total*100., 2), end=''))
#     i = a[0]
#     j = b[0]
#     r = haversine_distances([a[1], b[1]])[0][1] * 3958.754641
#     count += 1

### Calculate which stores (and number of stores) fall within some distance of the tract centers.

Create a transformer to project coordinates from latitude/longitude to a 2D plane of the USA.

In [21]:
from pyproj import Transformer
transformer = Transformer.from_crs("epsg:4326", "epsg:2163") # lat/lon to us 2d projection

Project all of the tract locations. Store for use below.

In [22]:
# transform coordinate system
tract_prep = [] # store (i, prepared_location)
# iterate through each tract
for i, row in result.iterrows():
    print('\r{} of {}'.format(i+1, result.shape[0]), end='')
    tract_lat = row.INTPTLAT
    tract_lon = row.INTPTLONG
    x, y = transformer.transform(tract_lat, tract_lon)
    point = [x, y]
    tract_prep.append((i, point))
print('tracts complete\n')

1 of 730562 of 730563 of 730564 of 730565 of 730566 of 730567 of 730568 of 730569 of 7305610 of 7305611 of 7305612 of 7305613 of 7305614 of 7305615 of 7305616 of 7305617 of 7305618 of 7305619 of 7305620 of 7305621 of 7305622 of 7305623 of 7305624 of 7305625 of 7305626 of 7305627 of 7305628 of 7305629 of 7305630 of 7305631 of 7305632 of 7305633 of 7305634 of 7305635 of 7305636 of 7305637 of 7305638 of 7305639 of 7305640 of 7305641 of 7305642 of 7305643 of 7305644 of 7305645 of 7305646 of 7305647 of 7305648 of 7305649 of 7305650 of 7305651 of 7305652 of 7305653 of 7305654 of 7305655 of 7305656 of 7305657 of 7305658 of 7305659 of 7305660 of 7305661 of 7305662 of 7305663 of 7305664 of 7305665 of 7305666 of 7305667 of 7305668 of 7305669 of 7305670 of 7305671 of 7305672 of 7305673 of 7305674 of 7305675 of 7305676 of 7305677 of 7305678 of 7305679 of 7305680 of 7305681 of 7305682 of 7305683 of 7305684 of 73056

741 of 73056742 of 73056743 of 73056744 of 73056745 of 73056746 of 73056747 of 73056748 of 73056749 of 73056750 of 73056751 of 73056752 of 73056753 of 73056754 of 73056755 of 73056756 of 73056757 of 73056758 of 73056759 of 73056760 of 73056761 of 73056762 of 73056763 of 73056764 of 73056765 of 73056766 of 73056767 of 73056768 of 73056769 of 73056770 of 73056771 of 73056772 of 73056773 of 73056774 of 73056775 of 73056776 of 73056777 of 73056778 of 73056779 of 73056780 of 73056781 of 73056782 of 73056783 of 73056784 of 73056785 of 73056786 of 73056787 of 73056788 of 73056789 of 73056790 of 73056791 of 73056792 of 73056793 of 73056794 of 73056795 of 73056796 of 73056797 of 73056798 of 73056799 of 73056800 of 73056801 of 73056802 of 73056803 of 73056804 of 73056805 of 73056806 of 73056807 of 73056808 of 73056809 of 73056810 of 73056811 of 73056812 of 73056813 of 73056814 of 73056815 of 73056816 of 73056817 of 7305

73056 of 73056tracts complete



Project all of the amenities locations for use below.

In [23]:
def amenity_projections(df):
    amen_prep = []
    for j, rowj in df.iterrows():
        print('\r{} of {}'.format(j+1, df.shape[0]), end='')
        am_lat = rowj.lat_cleaned
        am_lon = rowj.lon_cleaned
        x, y = transformer.transform(am_lat, am_lon)
        point = [x, y]
        amen_prep.append((j, point))
    return amen_prep

AMENITIES_PROJ = {}
for name, df in AMENITIES.items():
    print(f'\n\nProjecting {name}')
    AMENITIES_PROJ[name] = amenity_projections(df)
    print(f'\n{name}: {len(AMENITIES_PROJ[name])}')
print(f'Tracts: {len(tract_prep)}')



Projecting GROCERY
53518 of 53518
GROCERY: 53518


Projecting GYMS
49348 of 49348
GYMS: 49348
Tracts: 73056


Build a kd-tree of the projected amenities points.

In [24]:
def amenity_kdtree(amenity_projection_list):
    points = np.array([x[1] for x in amenity_projection_list])
    print(points)
    point_tree = spatial.cKDTree(points)
    return point_tree   

AMENITY_KD_TREES = {}
for name, amenity_projection_list in AMENITIES_PROJ.items():
    print(name)
    AMENITY_KD_TREES[name] = amenity_kdtree(amenity_projection_list)

GROCERY
[[ 1008903.39597079  -291698.51000652]
 [ 1247265.06037619  -656209.377317  ]
 [ -168337.02677753 -1087554.82271786]
 ...
 [ 1686110.01810216  -300867.78168736]
 [  543273.62098321  -671832.77348739]
 [-1913137.92091447  -594763.41046583]]
GYMS
[[ 1366528.49245784 -1011749.44326778]
 [ 1451311.65010533 -1102767.86222991]
 [ 1837002.38342593 -1619951.85824966]
 ...
 [ -599261.13042061 -1452250.1295229 ]
 [-1695010.08223182 -1016198.50409325]
 [ 1227188.97791841  -339644.06235434]]


Iterate through the projected tract coordinates. For each amenity, query its kdtree with these coordinates and a distance. The units produced by the projection are in meters. We specify the number of miles to the miles_to_meters function to convert it to meters.

This code stores a list of amenity indexes that fall within the radius of the census tract center. These lists are later saved to the final 'full' table.

The number in these column names specifies the number of miles that were searched (radius) from the tract centers.

In [25]:
def miles_to_meters(miles):
    meters = miles * 1609.34
    return meters

def meters_to_miles(meters):
    miles = meters / 1609.34
    return miles

def get_distance_results(tract_prep, point_tree):
    # for each tract, get a list of amenity indexes that fall within x miles 
    distance_results = defaultdict(list)
    for result_idx, locationxy in tract_prep:
        for dist in [2,5,10,25,50]: #miles
            point_list = point_tree.query_ball_point(locationxy, miles_to_meters(dist), p=np.inf)
            n_points = len(point_list)
            distance_results['dist{}'.format(dist)].append(point_list)
    return distance_results

AMENITY_DISTANCE_RESULTS = {}
for name, point_tree in AMENITY_KD_TREES.items():
    print(f'{name}')
    AMENITY_DISTANCE_RESULTS[name] = pd.DataFrame(get_distance_results(tract_prep, point_tree))
    display(AMENITY_DISTANCE_RESULTS[name].head(3))

GROCERY


Unnamed: 0,dist2,dist5,dist10,dist25,dist50
0,[],"[39075, 46917, 39758, 49391, 13067, 13782]","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ..."
1,"[39075, 39758]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,..."
2,"[39075, 39758, 49391]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,..."


GYMS


Unnamed: 0,dist2,dist5,dist10,dist25,dist50
0,[],"[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399..."
1,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399..."
2,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399..."


Create weighted number for amenities within x distance

Weights go as 1 / distance (radius distance, in miles)

In [26]:
for name, df in AMENITY_DISTANCE_RESULTS.items():
    for col in df.columns:
        weight = 1. / int(col[4:])
        new_col = f'wt_n_{name}_dist_{col[4:]}'.upper()
        df[new_col] =  df[col].apply(lambda x: len(x)) * weight
        AMENITY_DISTANCE_RESULTS[name] = df
        print(name)
        display(AMENITY_DISTANCE_RESULTS[name].head(3))

GROCERY


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GROCERY_DIST_2
0,[],"[39075, 46917, 39758, 49391, 13067, 13782]","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ...",0.0
1,"[39075, 39758]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.0
2,"[39075, 39758, 49391]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.5


GROCERY


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GROCERY_DIST_2,WT_N_GROCERY_DIST_5
0,[],"[39075, 46917, 39758, 49391, 13067, 13782]","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ...",0.0,1.2
1,"[39075, 39758]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.0,1.8
2,"[39075, 39758, 49391]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.5,2.0


GROCERY


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GROCERY_DIST_2,WT_N_GROCERY_DIST_5,WT_N_GROCERY_DIST_10
0,[],"[39075, 46917, 39758, 49391, 13067, 13782]","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ...",0.0,1.2,1.4
1,"[39075, 39758]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.0,1.8,1.5
2,"[39075, 39758, 49391]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.5,2.0,1.5


GROCERY


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GROCERY_DIST_2,WT_N_GROCERY_DIST_5,WT_N_GROCERY_DIST_10,WT_N_GROCERY_DIST_25
0,[],"[39075, 46917, 39758, 49391, 13067, 13782]","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ...",0.0,1.2,1.4,2.4
1,"[39075, 39758]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.0,1.8,1.5,2.4
2,"[39075, 39758, 49391]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.5,2.0,1.5,2.4


GROCERY


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GROCERY_DIST_2,WT_N_GROCERY_DIST_5,WT_N_GROCERY_DIST_10,WT_N_GROCERY_DIST_25,WT_N_GROCERY_DIST_50
0,[],"[39075, 46917, 39758, 49391, 13067, 13782]","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ...",0.0,1.2,1.4,2.4,2.4
1,"[39075, 39758]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.0,1.8,1.5,2.4,2.44
2,"[39075, 39758, 49391]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.5,2.0,1.5,2.4,2.44


GYMS


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GYMS_DIST_2
0,[],"[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",0.0
1,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5
2,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5


GYMS


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GYMS_DIST_2,WT_N_GYMS_DIST_5
0,[],"[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",0.0,1.6
1,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,2.8
2,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,3.4


GYMS


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GYMS_DIST_2,WT_N_GYMS_DIST_5,WT_N_GYMS_DIST_10
0,[],"[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",0.0,1.6,2.0
1,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,2.8,2.1
2,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,3.4,2.6


GYMS


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GYMS_DIST_2,WT_N_GYMS_DIST_5,WT_N_GYMS_DIST_10,WT_N_GYMS_DIST_25
0,[],"[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",0.0,1.6,2.0,2.8
1,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,2.8,2.1,2.8
2,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,3.4,2.6,2.8


GYMS


Unnamed: 0,dist2,dist5,dist10,dist25,dist50,WT_N_GYMS_DIST_2,WT_N_GYMS_DIST_5,WT_N_GYMS_DIST_10,WT_N_GYMS_DIST_25,WT_N_GYMS_DIST_50
0,[],"[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",0.0,1.6,2.0,2.8,1.88
1,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,2.8,2.1,2.8,1.86
2,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,3.4,2.6,2.8,1.86


Update the dist field names.    

In [27]:
for name, df in AMENITY_DISTANCE_RESULTS.items():
    dist_cols = [x for x in df.columns if x.startswith('dist')]
    for col in dist_cols:
        df.rename(columns={col: f'LIST_{name}_DIST_{col[4:]}'}, inplace=True)
    print(name)
    display(AMENITY_DISTANCE_RESULTS[name].head(3))

GROCERY


Unnamed: 0,LIST_GROCERY_DIST_2,LIST_GROCERY_DIST_5,LIST_GROCERY_DIST_10,LIST_GROCERY_DIST_25,LIST_GROCERY_DIST_50,WT_N_GROCERY_DIST_2,WT_N_GROCERY_DIST_5,WT_N_GROCERY_DIST_10,WT_N_GROCERY_DIST_25,WT_N_GROCERY_DIST_50
0,[],"[39075, 46917, 39758, 49391, 13067, 13782]","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ...",0.0,1.2,1.4,2.4,2.4
1,"[39075, 39758]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.0,1.8,1.5,2.4,2.44
2,"[39075, 39758, 49391]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",1.5,2.0,1.5,2.4,2.44


GYMS


Unnamed: 0,LIST_GYMS_DIST_2,LIST_GYMS_DIST_5,LIST_GYMS_DIST_10,LIST_GYMS_DIST_25,LIST_GYMS_DIST_50,WT_N_GYMS_DIST_2,WT_N_GYMS_DIST_5,WT_N_GYMS_DIST_10,WT_N_GYMS_DIST_25,WT_N_GYMS_DIST_50
0,[],"[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",0.0,1.6,2.0,2.8,1.88
1,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,2.8,2.1,2.8,1.86
2,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,3.4,2.6,2.8,1.86


Append the newly calc'd info to the `result` table

In [28]:
for name, df in AMENITY_DISTANCE_RESULTS.items():
    result = pd.concat([result, df], axis=1)
result.head(3)

Unnamed: 0,GEOID,INTPTLAT,INTPTLONG,N_GROCERY_WITHIN_TRACT,N_GYMS_WITHIN_TRACT,LIST_GROCERY_DIST_2,LIST_GROCERY_DIST_5,LIST_GROCERY_DIST_10,LIST_GROCERY_DIST_25,LIST_GROCERY_DIST_50,...,LIST_GYMS_DIST_2,LIST_GYMS_DIST_5,LIST_GYMS_DIST_10,LIST_GYMS_DIST_25,LIST_GYMS_DIST_50,WT_N_GYMS_DIST_2,WT_N_GYMS_DIST_5,WT_N_GYMS_DIST_10,WT_N_GYMS_DIST_25,WT_N_GYMS_DIST_50
0,1001020100,32.481959,-86.491338,0.0,0.0,[],"[39075, 46917, 39758, 49391, 13067, 13782]","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ...",...,[],"[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",0.0,1.6,2.0,2.8,1.88
1,1001020200,32.475758,-86.472468,0.0,0.0,"[39075, 39758]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",...,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,2.8,2.1,2.8,1.86
2,1001020300,32.474024,-86.459703,1.0,2.0,"[39075, 39758, 49391]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",...,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,3.4,2.6,2.8,1.86


Output this full table to a file. This is rather large. It is needed for the visualization but not for clustering.

In [29]:
OUTPATH_ROOT = '../../amenities'
FULL_NAME = 'amenities_full.pkl.gz'

result.to_pickle(os.path.join(OUTPATH_ROOT, FULL_NAME), protocol=4, compression='gzip')

Output a much smaller version (per disk storage) of the table without the list columns. This would be ideal to use for clustering.

In [30]:
FEAT_NAME = 'amenities_features.pkl'
result[[x for x in result.columns if not x.startswith('LIST_')]].to_pickle(
    os.path.join(OUTPATH_ROOT, FEAT_NAME), protocol=4
)

Test read each output

In [31]:
print(FULL_NAME)
display(pd.read_pickle(os.path.join(OUTPATH_ROOT, FULL_NAME), compression='gzip').head())
print(FEAT_NAME)
display(pd.read_pickle(os.path.join(OUTPATH_ROOT, FEAT_NAME)).head())

amenities_full.pkl.gz


Unnamed: 0,GEOID,INTPTLAT,INTPTLONG,N_GROCERY_WITHIN_TRACT,N_GYMS_WITHIN_TRACT,LIST_GROCERY_DIST_2,LIST_GROCERY_DIST_5,LIST_GROCERY_DIST_10,LIST_GROCERY_DIST_25,LIST_GROCERY_DIST_50,...,LIST_GYMS_DIST_2,LIST_GYMS_DIST_5,LIST_GYMS_DIST_10,LIST_GYMS_DIST_25,LIST_GYMS_DIST_50,WT_N_GYMS_DIST_2,WT_N_GYMS_DIST_5,WT_N_GYMS_DIST_10,WT_N_GYMS_DIST_25,WT_N_GYMS_DIST_50
0,1001020100,32.481959,-86.491338,0.0,0.0,[],"[39075, 46917, 39758, 49391, 13067, 13782]","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ...",...,[],"[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",0.0,1.6,2.0,2.8,1.88
1,1001020200,32.475758,-86.472468,0.0,0.0,"[39075, 39758]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",...,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,2.8,2.1,2.8,1.86
2,1001020300,32.474024,-86.459703,1.0,2.0,"[39075, 39758, 49391]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",...,"[30530, 5376, 8276]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",1.5,3.4,2.6,2.8,1.86
3,1001020400,32.47103,-86.444835,0.0,0.0,"[39075, 39758, 49391, 13067, 13782]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[39075, 46337, 49392, 46917, 39758, 49391, 130...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 42647, 17626, 7434, 32793, 9946, 6963,...",...,"[30530, 5376, 8276, 11421, 13300, 38649, 16841]","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",3.5,3.4,2.8,2.8,1.86
4,1001020500,32.458922,-86.421826,3.0,3.0,"[39075, 39758, 49391, 13067, 13782, 19291]","[39075, 39758, 49391, 13067, 13782, 19291, 886...","[31676, 39075, 46337, 49392, 46917, 39758, 493...","[49388, 31676, 22192, 35804, 49400, 21223, 216...","[49388, 17626, 7434, 32793, 9946, 6963, 9351, ...",...,"[5376, 8276, 11421, 13300, 38649, 16841, 27504...","[30530, 43997, 5376, 8276, 11421, 13300, 38649...","[44002, 30530, 43997, 5376, 8276, 11421, 13300...","[44001, 43994, 44002, 30918, 34752, 18336, 285...","[44001, 3785, 43994, 44002, 30918, 40466, 4399...",4.5,3.8,3.0,2.8,1.82


amenities_features.pkl


Unnamed: 0,GEOID,INTPTLAT,INTPTLONG,N_GROCERY_WITHIN_TRACT,N_GYMS_WITHIN_TRACT,WT_N_GROCERY_DIST_2,WT_N_GROCERY_DIST_5,WT_N_GROCERY_DIST_10,WT_N_GROCERY_DIST_25,WT_N_GROCERY_DIST_50,WT_N_GYMS_DIST_2,WT_N_GYMS_DIST_5,WT_N_GYMS_DIST_10,WT_N_GYMS_DIST_25,WT_N_GYMS_DIST_50
0,1001020100,32.481959,-86.491338,0.0,0.0,0.0,1.2,1.4,2.4,2.4,0.0,1.6,2.0,2.8,1.88
1,1001020200,32.475758,-86.472468,0.0,0.0,1.0,1.8,1.5,2.4,2.44,1.5,2.8,2.1,2.8,1.86
2,1001020300,32.474024,-86.459703,1.0,2.0,1.5,2.0,1.5,2.4,2.44,1.5,3.4,2.6,2.8,1.86
3,1001020400,32.47103,-86.444835,0.0,0.0,2.5,2.0,1.6,2.4,2.44,3.5,3.4,2.8,2.8,1.86
4,1001020500,32.458922,-86.421826,3.0,3.0,3.0,2.0,2.3,2.4,2.4,4.5,3.8,3.0,2.8,1.82
