In [30]:
import pandas as pd
from bokeh.io import output_notebook, output_file, show
from bokeh.palettes import Spectral11
from bokeh.plotting import figure
from bokeh.models.tickers import SingleIntervalTicker
from bokeh.models import Range1d, FuncTickFormatter, LabelSet, ColumnDataSource

from bokeh.palettes import Category20
from bokeh.core.properties import Dict, Int, String
from bokeh.models.formatters import String, List, Dict, Int
from bokeh.models import FixedTicker

data = pd.read_csv('./NationalParkVisits.csv')

del data['Unnamed: 6']

is_np_visit = []
for name in data['Park']:
    if isinstance(name, str) and 'NP' in name:
        is_np_visit.append(True)
    else:
        is_np_visit.append(False)

is_np_visit_series = pd.Series(is_np_visit)
np_data = data[is_np_visit_series].fillna(0)
np_data = np_data.assign(Year=np_data['Year'].map(lambda x: int(x)))
print(np_data)

            Park  Year Recreation Visitors Tent Campers RV Campers  \
84     Acadia NP  1919              64,000            0          0   
85     Acadia NP  1920              66,500            0          0   
86     Acadia NP  1921              69,836            0          0   
87     Acadia NP  1922              73,779            0          0   
88     Acadia NP  1923              64,200            0          0   
89     Acadia NP  1924              71,758            0          0   
90     Acadia NP  1925              73,673            0          0   
91     Acadia NP  1926             101,256            0          0   
92     Acadia NP  1927             123,699            0          0   
93     Acadia NP  1928             134,897            0          0   
94     Acadia NP  1929             149,554            0          0   
95     Acadia NP  1930             154,734            0          0   
96     Acadia NP  1931             162,238            0          0   
97     Acadia NP  19

In [31]:
np_data = np_data.applymap(lambda x: x.replace(',', '') if isinstance(x, str) else x)

In [32]:
total_visitors = []
for index, row in np_data.iterrows():
    total = int(row['Recreation Visitors']) + int(row['Tent Campers']) + int(row['RV Campers']) + int(row['Backcountry Campers'])
    total_visitors.append(total)
np_data = np_data.assign(TotalVisitors=total_visitors)
np_data

Unnamed: 0,Park,Year,Recreation Visitors,Tent Campers,RV Campers,Backcountry Campers,TotalVisitors
84,Acadia NP,1919,64000,0,0,0,64000
85,Acadia NP,1920,66500,0,0,0,66500
86,Acadia NP,1921,69836,0,0,0,69836
87,Acadia NP,1922,73779,0,0,0,73779
88,Acadia NP,1923,64200,0,0,0,64200
89,Acadia NP,1924,71758,0,0,0,71758
90,Acadia NP,1925,73673,0,0,0,73673
91,Acadia NP,1926,101256,0,0,0,101256
92,Acadia NP,1927,123699,0,0,0,123699
93,Acadia NP,1928,134897,0,0,0,134897


In [33]:
rank = np_data.groupby('Year')['TotalVisitors'].rank(method='first', ascending=False)
rank = rank.apply(lambda x: int(x))
np_data = np_data.assign(ranks=rank)

In [34]:
output_file("bokeh_plot.html")

In [35]:
unique_parks = np_data['Park'].unique()
numlines = len(unique_parks)
xs_data = [np_data[np_data['Park'] == name]['Year'].values for name in unique_parks]
ys_data = [np_data[np_data['Park'] == name]['ranks'].values for name in unique_parks]

In [36]:
colors = Category20.get(20)
colored_parks = [
    'Great Smoky Mountains NP',
    'Grand Canyon NP',
    'Rocky Mountain NP',
    'Yosemite NP',
    'Yellowstone NP',
    'Zion NP',
    'Acadia NP',
    'Hot Springs NP',
    'Denali NP & PRES',
    'Carlsbad Caverns NP',
    'Great Basin NP'
]

line_width=[]
colors_list = []
color_index = 0
for park in unique_parks:
    if park in colored_parks:
        colors_list.append(colors[color_index])
        color_index += 1
        line_width.append(0.8)
    else:
        colors_list.append('#cccccc')
        line_width.append(0.6)


In [37]:
p = figure(plot_width=800, plot_height=800, title = "The most popular national parks")

p.xaxis.ticker = FixedTicker(ticks=[1925, 1950, 1975, 2000])
p.yaxis.ticker = FixedTicker(ticks=[1, 25, 50])
p.xgrid.ticker = FixedTicker(ticks=[1925, 1950, 1975, 2000])
p.ygrid.ticker = FixedTicker(ticks=[1, 25, 50])
p.grid.grid_line_color = '#cccccc'
p.background_fill_color = "whitesmoke"
p.background_fill_alpha = 0.5
p.yaxis.axis_line_alpha = 0
p.xaxis.axis_line_alpha = 0
p.yaxis.axis_label = "Rank"
p.x_range = Range1d(1900, 2050)
p.y_range = Range1d(75, 0)
p.xgrid.bounds = (1925, 2000)
p.ygrid.bounds = (50,1)

p.yaxis.formatter = FuncTickFormatter(code="""
    if (tick % 10 == 1) {
        return tick + "st";
    } else if (tick % 10 == 2) {
        return tick + "nd";
    } else {
        return tick + "th";
    }
""")

def tranform_park_name(name):
    return name.split(' NP')[0].upper()

p.multi_line(xs=xs_data, ys=ys_data, line_width=line_width, line_color=colors_list)
for i in range(0, len(unique_parks)):
    p.line(x=xs_data[i], y=ys_data[i], line_width=line_width[i], line_color=colors_list[i])
    if unique_parks[i] in colored_parks:
        p.text(x=[xs_data[i][-1]], y=[ys_data[i][-1]], text=[tranform_park_name(unique_parks[i])],
               text_font_size='8pt', x_offset = 6, y_offset = 6, text_color=[colors_list[i]])


show(p)