# Spatial Analysis

### Let's find a hike that meets these criteria:

* In or around Washington State, USA
* 3-20 miles long
* Outside city limits
* Trailhead is within 1 mile of a transit stop

In [None]:
# a few import statements
import pandas as pd
import geopandas as gpd
import folium

### Gather and prepare Washington State hike data

In [None]:
# load in data that was scraped from the Washington Trails Association (WTA) website
# https://github.com/yoshiohasegawa/wta-scraper

hikes_df = pd.read_csv('../Data/2021-04-25_wta_hike_data.csv') 

In [None]:
# let's look at the table, we need to use LATITUDE and LONGITUDE to create a geodataframe, but can't use those NaNs

hikes_df.head(3)

In [None]:
# remove rows without LATITUDE and LONGITUDE

hikes_df = hikes_df.dropna(subset=['LATITUDE', 'LONGITUDE'])

In [None]:
# turn the table into a GeoDataFrame

hikes_gdf = gpd.GeoDataFrame(hikes_df, geometry=gpd.points_from_xy(hikes_df.LONGITUDE, hikes_df.LATITUDE))

In [None]:
# since we obtained this data from a tabular format, we must set the crs manually
# to WGS 84 Latitude/Longitude, i.e. epsg 4326

hikes_gdf = hikes_gdf.set_crs('epsg:4326')

In [None]:
# invoke the explore method on the GeoDataFrame
# note that it zooms to the full extent of the data

hikes_gdf.explore(tiles='CartoDB positron')

In [None]:
# these hikes/trailheads should all be in or near the state of Washington,
# so there are two that are definitely in the wrong location, we could remove or ignore them, but let's try to fix them

# let's look at the stats of the LATITUDE and LONGITUDE columns
# using this info it seems like LATITUDE values should be around 47 and LONGITUDE values should be around -121

hikes_df[['LATITUDE', 'LONGITUDE']].mean()

In [None]:
# hovering over these locations in the interactive map we can find the titles of the problem locations
# this way we can find out their index value

hikes_df[hikes_df['TITLE']=='Stehekin to Rainy Pass']

In [None]:
# from the results above, we can see that the LATITUDE and LONGITUDE
# data has been swapped, and LONGITUDE needs to be negative
# fix this data using the index

hikes_df.at[937,'LATITUDE'] = 48.3093
hikes_df.at[937,'LONGITUDE'] = -120.6565

In [None]:
# in this case, the issue is that LONGITUDE needs to be negative

hikes_df[hikes_df['TITLE']=='Brooks Memorial State Park']

In [None]:
# fix the data using the index

hikes_df.at[2412,'LONGITUDE'] = -120.4

In [None]:
# recreate the GeoDataFrame using the cleaned up DataFrame

hikes_gdf = gpd.GeoDataFrame(hikes_df, geometry=gpd.points_from_xy(hikes_df.LONGITUDE, hikes_df.LATITUDE))
hikes_gdf = hikes_gdf.set_crs('epsg:4326')

In [None]:
# this data looks better

hikes_gdf.explore(tiles='CartoDB positron')

### Find trails that are 3-20 miles long

In [None]:
# customize the hikes by coloring them according to their distance

print('Minimum distance:', hikes_gdf['DISTANCE'].min())
print('Maximum distance:', hikes_gdf['DISTANCE'].max())

In [None]:
%%capture
# filter out hikes that meet distance criteria

hikes_dist = hikes_gdf[(hikes_gdf['DISTANCE'] >= 3) & (hikes_gdf['DISTANCE'] <= 20) & (hikes_gdf['DIST_TYPE'] == 'roundtrip')]

# create a DIFFICULTY column depending on if the hikes are Short, Medium, or Long to use as the coloring and legend
  # Short hikes are less than or equal to 5 miles
  # Medium hikes are greater than 5 and less than or equal to 10 miles
  # Long hikes are greater than 10 (and less than or equal to 20 miles)

hikes_dist.loc[hikes_dist['DISTANCE'] <= 5, 'DIFFICULTY'] = 'Short'
hikes_dist.loc[(hikes_dist['DISTANCE'] > 5) & (hikes_dist['DISTANCE'] <=10), 'DIFFICULTY'] = 'Medium'
hikes_dist.loc[hikes_dist['DISTANCE'] > 10, 'DIFFICULTY'] = 'Long'

In [None]:
# explore the subset of hikes

hikes_dist.explore(
     tiles='CartoDB positron',
     cmap='tab20b',
     column='DIFFICULTY',
     marker_kwds=dict(radius=3, fill=True),
     tooltip='TITLE',
     tooltip_kwds=dict(labels=False), 
     name='Hikes')

### Find trailheads that are outside city limits

In [None]:
# source: https://geo.wa.gov/datasets/WSDOT::wsdot-city-limits/explore

cityfilename = r'../Data/WSDOT_-_City_Limits.geojson'
cityfile = open(cityfilename)
cities_gdf = gpd.read_file(cityfile)
cities_gdf.crs

In [None]:
# view the first few rows

cities_gdf.head(3)

In [None]:
cities_gdf['LastUpdate'] = cities_gdf['LastUpdate'].dt.strftime('%Y-%m-%d')


In [None]:
# explore the data

cities_gdf.explore(tiles='CartoDB positron')

In [None]:
# use the GeoPandas overlay method, passing in the cities, and set the 'how' parameter to 'difference'
# more info: https://geopandas.org/en/stable/docs/user_guide/set_operations.html

hikes_not_in_cities = hikes_dist.overlay(cities_gdf, how='difference')

In [None]:
# view the results on a map to verify that it worked

m = cities_gdf.explore(
     tiles='CartoDB positron',
     name='Cities')

hikes_not_in_cities.explore(
     m=m,
     column='DIFFICULTY',
     cmap='tab20b',
     marker_kwds=dict(radius=3, fill=True),
     tooltip='TITLE',
     tooltip_kwds=dict(labels=False),
     name="Hikes")

folium.LayerControl().add_to(m)  # use folium to add layer control

m

### Find trailheads that are within 1 mile of a transit stop

In [None]:
# source: https://geo.wa.gov/datasets/5926fb7a8cc64c068c6bfa92e72eef56/explore
# transit lines are important too, but we can only access a trailhead if there is an actual stop nearby

# load in transit stops, turn into a geodatagrame
tstopsfilename = r'../Data/WSDOT_-_Transit_Stops.geojson'
tstopsfile = open(tstopsfilename)
tstops_gdf = gpd.read_file(tstopsfile)

# remove transit stops within cities since we don't need them and this will reduce processing time
tstops_gdf = tstops_gdf.overlay(cities_gdf, how='difference')

In [None]:
tstops_gdf.head(3)

In [None]:
tstops_gdf['LastUpdated'] = tstops_gdf['LastUpdated'].dt.strftime('%Y-%m-%d')

In [None]:
# explore the data

tstops_gdf.explore(tiles='CartoDB positron')

In [None]:
# for this step, define an area of a 1-mile radius around each transit stop, in order to see what trailheads fall inside
# however, measurements do not work well using a crs that uses degress as the units

tstops_gdf.crs

In [None]:
# create copies in a projected crs, choosing NAD83 / UTM zone 10N

hikes_UTM = hikes_not_in_cities.to_crs(26910)
tstops_UTM = tstops_gdf.to_crs(26910)

In [None]:
# notice the units are now meters

hikes_UTM.crs

In [None]:
# call the GeoPandas buffer method on the reprojected transit stops data,
# passing in (1609.34 meters = 1 mile) and a resolution
tstops_UTM_1mi_series = tstops_UTM.buffer(1609.34, resolution=6)

# make a copy of the transit stops GeoDataframe
tstops_UTM_buf = tstops_UTM

# replace the geometry with the 1-mile buffers
tstops_UTM_buf['geometry'] = tstops_UTM_1mi_series

In [None]:
tstops_UTM_buf.head()

In [None]:
# view the results

tstops_UTM_buf.explore(tiles='CartoDB positron')

In [None]:
# call the GeoPandas .sjoin method to find the intersection of the trailheads and 1-mile radius areas,
# keeping the attributes of both
hike_results = hikes_UTM.sjoin(tstops_UTM_buf, how='inner', predicate='intersects')

In [None]:
# rename columns to include units for the final map tooltips
hike_results = hike_results.rename(columns ={'DISTANCE':'DISTANCE (MI)','GAIN':'GAIN (FT)'})

In [None]:
# make our final map, showing the hikes we can access, and relevant information in the tooltip

m = hike_results.explore(
    tiles='CartoDB positron', # use a terrain basemap
    cmap='tab20b',
    column='DIFFICULTY',
    scheme='naturalbreaks',
    marker_kwds=dict(radius=6, fill=True),
    tooltip=['TITLE','DISTANCE (MI)','GAIN (FT)','Agency','StopName']) 

m

In [None]:
# save the results as an html file that anyone with a web browser can open

m.save('../Output/Hike_Map.html')