In [1]:
# Load data
import pandas
import json
from bokeh.plotting import figure, show, output_file
from bokeh.layouts import column
from bokeh.models import ColumnDataSource, Legend, LegendItem, HoverTool, Range1d
from bokeh.transform import factor_mark, factor_cmap

# See https://www.nps.gov/yell/planyourvisit/campgrounds.htm

# https://nps-yell.cartodb.com/api/v2/sql?q=SELECT%20*%20FROM%20campgrounds_and_lodging
with open('./campgrounds.json') as fd: 
  sites = json.load(fd)
sites = pandas.DataFrame.from_dict(sites['rows'])
# https://nps-yell.cartodb.com/api/v2/sql?q=SELECT%20*%20FROM%20campgrounds_and_lodging_status
with open('closetimes-11-6-2019.json') as fd:
  times = json.load(fd)
times = pandas.DataFrame.from_dict(times['rows'])
merged = sites.merge(times, on='npmap_id')

# Merge and fill missing data
data = merged.filter(['name', 'type', 'fill_datetime' , 'updated_by_y'])

# Convert to local time
data['fill_datetime'] = pandas.to_datetime(data['fill_datetime']).dt.tz_convert('US/Mountain')

# Remove empty records
data = data.loc[data.fill_datetime.notna()]

# Separate fill time and fill date from fill_datetime
data['time'] = [
  d.time() # (d + pandas.Timedelta(hours=-7))
  for d in data['fill_datetime']
]
data['date'] = [d.date() for d in pandas.to_datetime(data['fill_datetime'])]

# Set the index and remove duplicate records per location-date
data.set_index(['name', 'date'], inplace=True)
data = data.sort_values(by='fill_datetime')
data = data.loc[~data.index.duplicated(keep='last')]

# Add all missing dates per location
date_domain = pandas.date_range(data.index.levels[1].min(), data.index.levels[1].max(), freq='D')
index = pandas.MultiIndex.from_product([data.index.levels[0], date_domain], names=['name', 'date'])
data = data.reindex(index)

data.loc[data.time.isna()]


Unnamed: 0_level_0,Unnamed: 1_level_0,type,fill_datetime,updated_by_y,time
name,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bridge Bay Campground,2015-07-11,,NaT,,
Bridge Bay Campground,2015-08-21,,NaT,,
Bridge Bay Campground,2015-08-22,,NaT,,
Bridge Bay Campground,2015-08-23,,NaT,,
Bridge Bay Campground,2015-08-24,,NaT,,
Bridge Bay Campground,2015-08-25,,NaT,,
Bridge Bay Campground,2015-08-26,,NaT,,
Bridge Bay Campground,2015-08-27,,NaT,,
Bridge Bay Campground,2015-08-28,,NaT,,
Bridge Bay Campground,2015-08-29,,NaT,,


In [2]:
# Fill missing values
data['type'] = data.groupby('name')['type'].fillna(method='backfill').fillna(method='pad')
end_of_day = pandas.Timestamp('23:59:00').time()
data['time'] = data['time'].fillna(end_of_day)

data[:5]


Unnamed: 0_level_0,Unnamed: 1_level_0,type,fill_datetime,updated_by_y,time
name,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bridge Bay Campground,2015-07-11,Campground,NaT,,23:59:00
Bridge Bay Campground,2015-07-12,Campground,2015-07-12 06:50:00-06:00,Nathaniel Irwin,06:50:00
Bridge Bay Campground,2015-07-13,Campground,2015-07-13 06:50:00-06:00,Tami Blackford,06:50:00
Bridge Bay Campground,2015-07-14,Campground,2015-07-14 06:48:00-06:00,Kaelyn Johnson,06:48:00
Bridge Bay Campground,2015-07-15,Campground,2015-07-15 06:46:00-06:00,Elizabeth Dreyer,06:46:00


In [3]:
# Transform data for plot
data['year'] = [d.year for d in data.index.get_level_values('date')]
data['weekaligned_date'] = [
  date 
  + pandas.Timedelta(days=pandas.Timestamp(year=year, month=1, day=1).weekday()) # Align on day of week of Jan 1 for that year
  + pandas.Timedelta(days=1 if pandas.Timestamp(date).is_leap_year and pandas.Timestamp(date) >= pandas.Timestamp(year=year, month=2, day=29) else 0) # Adjust for leap year
  for date, year in zip(data.index.get_level_values('date'), data['year'])
]
# Flatten all years to 2016 so that they overlap. 2016 is a leapyear making it a superset of dates of the other years
data['flatdate'] = [pandas.Timestamp(year=2016, month=d.month, day=d.day, freq='d') for d in data.index.get_level_values('date')]
data['weekaligned_flatdate'] = [pandas.Timestamp(year=2016, month=d.month, day=d.day, freq='d') for d in data['weekaligned_date']]

# Convert year to string for categorical and reset index cuz bokeh cray
data['year'] = data['year'].astype(str)
data = data.reset_index()
data = data.sort_values(['type', 'year'])
data[:5]


Unnamed: 0,name,date,type,fill_datetime,updated_by_y,time,year,weekaligned_date,flatdate,weekaligned_flatdate
0,Bridge Bay Campground,2015-07-11,Campground,NaT,,23:59:00,2015,2015-07-14,2016-07-11,2016-07-14
1,Bridge Bay Campground,2015-07-12,Campground,2015-07-12 06:50:00-06:00,Nathaniel Irwin,06:50:00,2015,2015-07-15,2016-07-12,2016-07-15
2,Bridge Bay Campground,2015-07-13,Campground,2015-07-13 06:50:00-06:00,Tami Blackford,06:50:00,2015,2015-07-16,2016-07-13,2016-07-16
3,Bridge Bay Campground,2015-07-14,Campground,2015-07-14 06:48:00-06:00,Kaelyn Johnson,06:48:00,2015,2015-07-17,2016-07-14,2016-07-17
4,Bridge Bay Campground,2015-07-15,Campground,2015-07-15 06:46:00-06:00,Elizabeth Dreyer,06:46:00,2015,2015-07-18,2016-07-15,2016-07-18


In [8]:
# Prepare figure
palette = ('#e6194b', '#3cb44b', '#ffe119', '#4363d8', '#f58231', '#911eb4', '#46f0f0', '#f032e6', '#bcf60c', '#fabebe', '#008080', '#e6beff', '#9a6324', '#fffac8', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000075', '#808080', '#000000')
y_range = Range1d(0, 8.64e+7) # 8.64e+7 milliseconds in a day
x_range = Range1d(data.flatdate.min(), data.flatdate.max())

def build_figure(data, x_col, title, types):
  # Assign marker to years and color to names
  colormap = {name : palette[i] for i, name in enumerate(data.name.unique())}
  markermap = {year: ['hex', 'circle_x', 'triangle', 'square', 'diamond'][i] for i, year in enumerate(data.year.unique())}
  
  fig = figure(
    title=title,
    width=1800,
    height=800,
    x_range=x_range,
    y_range=y_range,
    x_axis_type='datetime',
    y_axis_type='datetime',
    active_scroll='wheel_zoom',
  )
  
  # Bind data to figure and generate year legend
  legend_items = [
    LegendItem(label=year, renderers=[
      fig.scatter(
        source=ColumnDataSource(data.loc[(data.name == name) & (data.year == year) & (data.type.isin(types))]),
        x=x_col, 
        y='time',
        color=colormap[name],
        marker=markermap[year],
        fill_alpha=0.4, 
        size=8,
        legend=name,
        name=name,
      ) for name in data.name.unique()
    ]) for year in data.year.unique()
  ]
  fig.legend.location = 'top_left'
  fig.legend.click_policy="hide"
  
  # Configure data tool tips
  hover = HoverTool(
    tooltips=[
      ("Location","$name"),
      ("Filled at", "@time{%l:%M %P}"),
      ("On", "@date{%a %F}"),
      #("Updated by", "@updated_by_y"),
    ],
    formatters = {
      'date': 'datetime',
      'time': 'datetime',
    },
  )
  fig.add_tools(hover)
  
  legend = Legend(items=legend_items, click_policy="hide")
  fig.add_layout(legend)
  
  # Format axis
  fig.xaxis[0].ticker.desired_num_ticks = 24
  fig.xaxis[0].formatter.days = "%b %d"
  fig.xaxis[0].formatter.months = "%b %d"
  fig.yaxis[0].formatter.hourmin = "%l:%M %P"
  fig.yaxis[0].ticker.desired_num_ticks = 24
  fig.yaxis[0].formatter.hours = "%l:%M %P"
  fig.yaxis[0].formatter.days = "%l:%M %P"
  return fig

show(column(
  build_figure(data, 'flatdate', "Yellowstone National Park Fill Times", ["Campground",]), 
  build_figure(data, 'weekaligned_flatdate', "Yellowstone National Park Fill Times - Aligned on week day", ["Campground",])
))
