# Analysis 4 - Stop proximity to Shopping Centers


For the fourth and final analysis, I will do a nearest neighbor analysis of shopping centers in Fairfax County and how accessible they are to transit. The result of this analysis will be obtaining the closest bus stop to each shopping center and the distance from each particular shopping center to the closest bus stop.

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import glob
import re
import pyproj

In [3]:
agency = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\agency.txt')
calendar = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\calendar.txt')
calendar_dates = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\calendar_dates.txt')
fare_attributes = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\fare_attributes.txt')
frequencies = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\frequencies.txt')
routes = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\routes.txt')
shapes = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\shapes.txt')
stop_times = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\stop_times.txt')
stops = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\stops.txt')
timepoint_times = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\timepoint_times.txt')
timepoints = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\timepoints.txt')
trips = pd.read_csv(r'C:\Users\hameed\Desktop\Datasets\GTFS Analysis\ffx_GTFS\connector_gtfs\trips.txt')

Let's import our shopping center data and set the CRS of both the stops and the shopping center data to be the same after converting them both to a GeoDataFrame

In [10]:
fp = "../ffx_GTFS/Shopping_Centers.geojson"
shops = gpd.read_file(fp, driver="GeoJSON")
display(shops.head(2))
display(stops.head(2))

Unnamed: 0,OBJECTID_1,DESCRIPTION,JURISDICTION,CreationDate,Creator,EditDate,Editor,geometry
0,1,TOWNCENTER PLAZA,COUNTY OF FAIRFAX,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,POINT (-77.37239 39.01238)
1,2,LAKE ANNE PLAZA,COUNTY OF FAIRFAX,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,POINT (-77.34069 38.96913)


Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
0,3245,6552,Park St and McHenry St,,38.891683,-77.248563,,,,,,1
1,3246,6553,Lee Hwy and Eskirdge Rd (EB),,38.873912,-77.231425,,,,,,1


In [11]:
stops_geo = gpd.GeoDataFrame(
    stops, geometry=gpd.points_from_xy(stops.stop_lon, stops.stop_lat))

In [13]:
shops = shops.set_crs('EPSG:4326')
stops_geo = stops_geo.set_crs('EPSG:4326')

In [14]:
from sklearn.neighbors import BallTree
import numpy as np

In [20]:
def get_nearest(src_points, candidates, k_neighbors=1):
    """Find nearest neighbors for all source point from a set of candidate points"""
    # Create tree from the candidate points
    tree = BallTree(candidates, leaf_size=15, metric='haversine')
    # get distances, indices for each point
    distances, indices = tree.query(src_points, k=k_neighbors)
    # transpose for ease
    distances = distances.transpose()
    indices = indices.transpose()
    # get closest distance and its index
    closest = indices[0]
    closest_dist = distances[0]
    
    return (closest, closest_dist)

def nearest_neighbor(left_gdf, right_gdf, return_dist=True):
    # get column names for use in apply op.
    left_geom_col = left_gdf.geometry.name
    right_geom_col = right_gdf.geometry.name
    # make right index sequential
    right = right_gdf.copy().reset_index(drop=True)
    # convert geometries into radians so that nearest neighbor analysis is possible
    left_radians = np.array(left_gdf[left_geom_col].apply(lambda geom: (geom.y * np.pi/180, geom.x * np.pi/180)).to_list())
    right_radians = np.array(right_gdf[right_geom_col].apply(lambda geom: (geom.y * np.pi/180, geom.x * np.pi/180)).to_list())
    # find nearest points
    # -----------------------
    # closest ==> index in right_gdf for closest point
    # dist ==> distance between them
    closest, dist = get_nearest(src_points=left_radians, candidates=right_radians)
    closest_points = right.loc[closest]
    # get closest point rows corresponding to index in right gdf
    closest_points = closest_points.reset_index(drop=True)
    
    # Add distance 
    if return_dist:
        # convert to feet from radians
        earth_radius = 20925721.78 # feet
        closest_points['distance'] = dist * earth_radius
    
    return closest_points
    

In [21]:
closest_stops = nearest_neighbor(shops, stops_geo)

In [22]:
closest_stops.head()

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding,geometry,distance
0,399,1767,Dranesville Rd and Leesburg Pike,,39.01225,-77.37167,,,,,,1,POINT (-77.37167 39.01225),209.595839
1,1185,2911,N Shore Dr and Lake Anne Village Ctr,,38.9701,-77.3404,,,,,,1,POINT (-77.34040 38.97010),362.705481
2,561,1963,Fox Mill Rd and John Milton Dr,,38.928881,-77.375328,,,,,,1,POINT (-77.37533 38.92888),712.135527
3,1904,4080,Galesbury La and Lee Jackson Hwy,,38.887856,-77.418104,,,,,,1,POINT (-77.41810 38.88786),325.826001
4,1886,4053,Braddock Rd and Braddock Springs Rd,,38.835486,-77.425208,,,,,,1,POINT (-77.42521 38.83549),562.904249


In [24]:
closest_stops_merged = shops.join(closest_stops[['stop_id','stop_name', 'geometry', 'distance' ]], lsuffix='shop_loc')

In [25]:
display(closest_stops_merged.head())

Unnamed: 0,OBJECTID_1,DESCRIPTION,JURISDICTION,CreationDate,Creator,EditDate,Editor,geometryshop_loc,stop_id,stop_name,geometry,distance
0,1,TOWNCENTER PLAZA,COUNTY OF FAIRFAX,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,POINT (-77.37239 39.01238),399,Dranesville Rd and Leesburg Pike,POINT (-77.37167 39.01225),209.595839
1,2,LAKE ANNE PLAZA,COUNTY OF FAIRFAX,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,POINT (-77.34069 38.96913),1185,N Shore Dr and Lake Anne Village Ctr,POINT (-77.34040 38.97010),362.705481
2,5,FOX MILL,COUNTY OF FAIRFAX,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,POINT (-77.37554 38.92694),561,Fox Mill Rd and John Milton Dr,POINT (-77.37533 38.92888),712.135527
3,6,CHANTILLY PLAZA,COUNTY OF FAIRFAX,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,POINT (-77.41916 38.88819),1904,Galesbury La and Lee Jackson Hwy,POINT (-77.41810 38.88786),325.826001
4,7,OLD CENTREVILLE CROSSING,COUNTY OF FAIRFAX,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,2022-02-26T06:03:16.297001+00:00,FairfaxCounty,POINT (-77.42628 38.83678),1886,Braddock Rd and Braddock Springs Rd,POINT (-77.42521 38.83549),562.904249


Let's create categories for the distances which we'll use to plot an interactive map.

In [39]:
closest_stops_merged['distance_cat'] = pd.cut(closest_stops_merged['distance'], [0,250,500,750,1000,2000,5000,10000])

In [46]:
closest_stops_merged['distance_cat'] = closest_stops_merged['distance_cat'].astype('str')

Let's now display the shopping centers and the respective stops that are closest to them on an interactive map.

In [47]:
import folium

m = shops.explore(
     scheme="naturalbreaks", cmap='Accent', # use mapclassify's natural breaks scheme
     name="Shopping Centers" # name of the layer in the map
)

closest_stops_merged.explore(
        m=m, # pass the map object
     column ='distance_cat', legend=True, categorical=True, cmap='Set1',
     name="Closest Bus Stops" # name of the layer in the map
)

folium.TileLayer('cartodbpositron', control=True).add_to(m)  # use folium to add alternative tiles
folium.LayerControl().add_to(m)  # use folium to add layer control

m  # show map

The trend is difficult to discern by visual inspection only, but there are definitely clusters where stops are closer to shopping centers and aread where they are not.