In [7]:
%matplotlib inline
import pandas as pd
import regex as re
import json
import altair as alt
from vega_datasets import data
import matplotlib.pyplot as plt
from ast import literal_eval

#### Read in parks and trails data and do a little preprocessing

In [6]:
# read in data
parks = pd.read_csv('data/cleaned/parks.csv')
# this is needed because converts description into string with quotes 
parks['description2'] = parks['description2'].apply(literal_eval)
# maybe add websites later
# parks['website'] = 'https://www.google.com/'
# needed to match trail names
parks['name'] = parks['name'].apply(lambda s: str(s).replace('Wrangell-St. Elias', 'Wrangell-St Elias'))

# read in all trails data
# https://github.com/j-ane/trail-data/blob/master/alltrails-data.csv
all_trails = pd.read_csv('data/alltrails-data.csv')
all_trails = all_trails.rename(columns={'name': 'trail_name'})
all_trails['name'] = all_trails['area_name'].apply(lambda s: str(s).split('National')[0].strip())

# merge park and trails data
parks_trails = parks.merge(all_trails, on='name', how='left')
parks_trails = parks_trails.drop(parks_trails.columns[0], axis=1)
parks_trails.head()

Unnamed: 0,park,state,name,longitude,latitude,location,date_established,area,visitors_2021,description,...,length,elevation_gain,difficulty_rating,route_type,visitor_usage,avg_rating,num_reviews,features,activities,units
0,Acadia National Park,Maine,Acadia,-68.0493,44.454,"Maine.mw-parser-output .geo-default,.mw-parser...","February 26, 1919",49071,4069098,Covering most of Mount Desert Island and other...,...,2253.076,148.7424,5.0,loop,3.0,5.0,1123.0,"['beach', 'dogs-no', 'forest', 'lake', 'views'...","['birding', 'hiking', 'nature-trips', 'rock-cl...",i
1,Acadia National Park,Maine,Acadia,-68.0493,44.454,"Maine.mw-parser-output .geo-default,.mw-parser...","February 26, 1919",49071,4069098,Covering most of Mount Desert Island and other...,...,6437.36,340.7664,3.0,out and back,4.0,4.5,679.0,"['dogs-leash', 'forest', 'kids', 'partially-pa...","['birding', 'hiking', 'nature-trips', 'trail-r...",i
2,Acadia National Park,Maine,Acadia,-68.0493,44.454,"Maine.mw-parser-output .geo-default,.mw-parser...","February 26, 1919",49071,4069098,Covering most of Mount Desert Island and other...,...,5471.756,28.956,3.0,loop,3.0,4.5,578.0,"['dogs-leash', 'forest', 'lake', 'kids', 'part...","['birding', 'hiking', 'nature-trips', 'trail-r...",i
3,Acadia National Park,Maine,Acadia,-68.0493,44.454,"Maine.mw-parser-output .geo-default,.mw-parser...","February 26, 1919",49071,4069098,Covering most of Mount Desert Island and other...,...,4988.954,181.9656,3.0,loop,3.0,4.5,343.0,"['beach', 'dogs-leash', 'forest', 'kids', 'par...","['birding', 'hiking', 'nature-trips', 'walking']",i
4,Acadia National Park,Maine,Acadia,-68.0493,44.454,"Maine.mw-parser-output .geo-default,.mw-parser...","February 26, 1919",49071,4069098,Covering most of Mount Desert Island and other...,...,12070.05,684.8856,5.0,loop,3.0,4.5,426.0,"['dogs-leash', 'forest', 'views', 'waterfall',...","['hiking', 'nature-trips', 'rock-climbing']",i


In [11]:
# states
states = alt.topo_feature(data.us_10m.url, feature='states')
background = alt.Chart(
    states,
    title=alt.TitleParams("U.S. National Parks", fontSize=16,
          subtitle="Click on a park to learn more about it!")
).mark_geoshape(
    fill='lightgray',
    stroke='white'
).project('albersUsa').properties(
    width=600,
    height=450
)

# parks
highlight = alt.selection_point(on='click', fields=["park"], value=[{"park": "Mount Rainier National Park"}], empty=False)
points = alt.Chart(parks).mark_circle().encode(
    color=alt.condition(highlight, alt.value('hotpink'), alt.value('cornflowerblue')),
    longitude='longitude:Q',
    latitude='latitude:Q',
    size=alt.Size('area', scale=alt.Scale(range=[100, 700]), 
                  legend = alt.Legend(
                      orient='none',
                      legendX=130, legendY=425,
                      title = "Number of visitors (2021)",
                      padding=5,
                      strokeColor='lightgray',
                      direction='horizontal',
                      titleAnchor='middle')),
    tooltip=[
        alt.Tooltip('name', title='Name'), 
        alt.Tooltip('state', title='State'),
        alt.Tooltip('longitude:Q', title='Longitude'), 
        alt.Tooltip('latitude:Q', title='Latitude')
    ]
).add_params(highlight)

# info table 
ranked_text = alt.Chart(parks).mark_text(align='left').encode(
    y=alt.Y('row_number:O',axis=None)
).transform_window(
    row_number='row_number()'
).transform_filter(
    highlight
).transform_window(
    row_number='row_number()'
).transform_filter(
    'datum.row_number<10'
)

# Data Tables
park = ranked_text.encode(text='park').properties(title=alt.Title(text='Park', align='left'))
state = ranked_text.encode(text='state').properties(title=alt.Title(text='State', align='left'))
established = ranked_text.encode(text='date_established').properties(title=alt.Title(text='Establishment', align='left'))
area = ranked_text.encode(text=alt.Text('area:N', format=",.0f")).properties(title=alt.Title(text='Area', align='left'))
visitors = ranked_text.encode(text=alt.Text('visitors_2021:N', format=",.0f")).properties(title=alt.Title(text='Visitors in 2021', align='left'))
description = ranked_text.encode(text='description2').properties(title=alt.Title(text='Description', align='left'))
website = ranked_text.encode(text='website:N').properties(title=alt.Title(text='Website', align='left'))
#text = alt.vconcat(park, state, established, area, visitors, description, website, spacing=8) # Combine data tables
text = alt.vconcat(park, state, established, area, visitors, description, spacing=8) # Combine data tables

# info table 
ranked_text2 = alt.Chart(parks_trails).mark_text(align='left').encode(
    y=alt.Y('row_number:O',axis=None)
).transform_window(
    row_number='row_number()'
).transform_filter(
    highlight
).transform_window(
    row_number='row_number()'
).transform_filter(
    'datum.row_number<10'
)

# Data Tables
trail = ranked_text2.encode(text='trail_name').properties(title=alt.Title(text='Trail Name', align='left'))
length = ranked_text2.encode(text='length').properties(title=alt.Title(text='Length', align='left'))
elevation_gain = ranked_text2.encode(text='elevation_gain').properties(title=alt.Title(text='Elevation Gain', align='left'))
difficulty_rating = ranked_text2.encode(text='difficulty_rating').properties(title=alt.Title(text='Difficulty', align='left'))
visitor_usage = ranked_text2.encode(text='visitor_usage').properties(title=alt.Title(text='Visitor Usage', align='left'))
avg_rating = ranked_text2.encode(text='avg_rating').properties(title=alt.Title(text='Average Rating', align='left'))
route_type = ranked_text2.encode(text='route_type').properties(title=alt.Title(text='Route Type', align='left'))

# Combine data tables
text2 = alt.hconcat(trail, length, elevation_gain, difficulty_rating, visitor_usage, 
                    avg_rating, route_type, spacing=8
).transform_window(
  rank='rank(avg_rating)',
  sort=[alt.SortField('avg_rating', order='descending')]
)

# combine park map, additional info text on right, and trail text on bottom
graph = ((background + points) | text)
alt.vconcat(graph, text2, spacing=60).configure_view(stroke=None) 