# Great Lakes Surface Temperatures

Daily surface temperature data from NOAA Coast Watch (https://coastwatch.glerl.noaa.gov/statistic/statistic.html), in &deg;C are available for each of the Great Lakes, as well as Lake St. Clair.

In [12]:
import pandas as pd

def get_data(lake):
    
    url = f'''https://coastwatch.glerl.noaa.gov/statistic/csv/all_year_glsea_avg_{lake}_C.csv'''
    data = pd.read_csv(url, index_col=[0])
    data.index.rename('day', inplace=True)
    
    return {'url': url, 'data': data}


lakes = {'Lake Superior': get_data('s'), 'Lake Michigan': get_data('m'), 'Lake Huron': get_data('h'), 
         'Lake Erie': get_data('e'), 'Lake Ontario': get_data('o'), 'Lake St. Clair': get_data('c')}

Here is an example of the dataset for Lake Superior:

In [13]:
lakes['Lake Superior']['data']

Unnamed: 0_level_0,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3.29,1.77,1.17,2.13,2.49,3.23,0.98,3.58,3.20,3.61,...,3.04,3.57,3.73,1.60,2.59,3.94,3.88,2.47,3.29,3.23
2,3.28,1.66,1.12,2.19,2.35,3.23,0.92,3.64,3.17,3.50,...,2.99,3.55,3.72,1.41,2.46,3.92,3.46,2.37,3.22,3.20
3,3.27,1.57,1.07,2.06,2.22,3.15,0.87,3.69,3.13,3.48,...,3.48,3.52,3.53,1.27,2.36,3.88,3.33,2.24,3.21,3.09
4,3.27,1.52,1.03,1.97,2.10,3.06,1.35,3.62,3.00,3.49,...,2.35,3.57,3.76,1.37,2.18,3.84,3.28,2.19,3.28,3.09
5,3.22,1.49,1.01,1.91,2.00,2.97,0.63,3.60,3.05,3.53,...,3.48,3.58,3.71,1.36,2.06,3.83,3.13,2.03,3.26,3.06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,2.11,1.63,2.48,3.10,3.60,1.77,3.76,3.36,3.56,3.13,...,3.72,3.91,2.30,3.24,4.02,3.85,2.98,3.95,3.37,
363,2.09,1.51,2.39,2.97,3.36,2.64,3.72,3.29,3.58,2.86,...,3.69,3.85,2.25,3.12,4.00,3.89,2.78,3.70,3.36,
364,2.01,1.40,2.31,2.77,3.22,1.44,3.65,3.30,3.57,2.81,...,3.64,3.76,2.22,3.02,3.97,3.91,2.78,3.60,3.34,
365,1.93,1.31,2.23,2.59,3.20,1.28,3.60,3.26,3.57,2.94,...,3.59,3.69,1.79,2.89,3.95,3.91,2.62,3.53,3.29,


Grapics show each year as an individual plotted timeseries. Use the slider to change the highlighted year.

In [14]:
from bokeh.io import curdoc, show, output_notebook
from bokeh.models.sources import ColumnDataSource
from bokeh import palettes
from bokeh.plotting import figure
from bokeh.models import FixedTicker, Range1d, HoverTool, Label, Div, Slider
from bokeh.layouts import layout
from math import pi

output_notebook()

PLOT_WIDTH = 330
PLOT_HEIGHT = int(PLOT_WIDTH * 0.7)

# create base spaghetti plots
def create_baseplot(lake, df):
    
    plot_width = PLOT_WIDTH
    plot_height = PLOT_HEIGHT
    tools=[]

    unselected_kwargs = dict(line_width = 1.5,
                             line_alpha = 0.5)

    fig = figure(title=lake,
                 plot_height=plot_height,
                 plot_width=plot_width,
                 tools=tools)

    lines = list() # empty list to hold each plotted line

    source = ColumnDataSource(df)
    
    start_year = int(df.columns[0])
    end_year = int(df.columns[-1])
    
    # generate palette the size of dataset
    palette = palettes.grey(end_year - start_year + 1)
    
    # plot all years
    for i, yr in enumerate(range(start_year, end_year+1)):

        lines.append(fig.line(x='day', 
                              y=str(yr), 
                              source=source, 
                              color=palette[i],
                              name=str(yr),
                              **unselected_kwargs))
        
    # y-axis formatting
    fig.y_range=Range1d(0, 30, bounds='auto')
    fig.yaxis.axis_label = 'Temperature ({})'.format(u'\u2103') # unicode deg Celsius

    # x-axis formatting
    fig.xaxis.major_label_orientation = pi/2
    fig.x_range=Range1d(0, 366, bounds='auto')

    # locate x-ticks at start of each month
    ticks = []
    day_of_year=0

    for days in [1,31,28,31,30,31,30,31,31,30,31,30,31]:
        day_of_year = day_of_year + days
        ticks.append(day_of_year)
        
    fig.xaxis.ticker = FixedTicker(ticks=ticks)

    # label ticks with month name
    labels = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec','Jan']
    tick_labels = dict(zip(ticks, labels))
    fig.xaxis.major_label_overrides = tick_labels
    
    fig.add_tools(HoverTool(
                tooltips=[
                        ('Year', '$name'),
                        ('Day', '$x'),
                        ('Value', '$y{0.0}')],
                         toggleable = False
                         ))

    fig.toolbar.logo = None
    
    return fig, lines

# fig, lines = create_baseplot('Lake Superior', lakes['Lake Superior']['data'])
# lakes['Lake Superior']['plot'] = {'fig':fig, 'lines':lines}
# output_notebook()

# # example base plot
# show(lakes['Lake Superior']['plot']['fig'])

In [15]:
def plot_selected(fig, df, year):

    # add highlighted line function
    # uses custom javascript callback
    # based on https://stackoverflow.com/a/42321618/2574074

    selected_color = 'firebrick'
    selected_kwargs = dict(line_color = selected_color,
                           line_width = 4)

    source = ColumnDataSource(df)
  
    selected = fig.line(x='day',
                          y=str(year), 
                          source=source,
                          name='Selected',
                          **selected_kwargs)
    
    # find location of peak value for labelling
    labelx = df[str(year)].idxmax(axis=0) # index of max temp
    labely = df.loc[:, str(year)].max() # max temp

    # add label for selected year, locate at peak
    label = Label(x=labelx, y=labely, x_units='data', 
                        text=str(year), render_mode='css',
                        text_color=selected_color, text_baseline='bottom')

    fig.add_layout(label)
    
    return fig, selected, label

In [18]:
def modify_doc(doc):

    for lake in lakes.keys():
        
        fig, lines = create_baseplot(lake, lakes[lake]['data'])
        
        start_year = int(lakes[lake]['data'].columns[0])
        end_year = int(lakes[lake]['data'].columns[-1])
        fig, selected, label = plot_selected(fig, lakes[lake]['data'], end_year)
        
        lakes[lake]['plot'] = {'fig':fig, 'lines':lines, 'selected':selected, 'label':label}
        
    curdoc().theme = 'light_minimal'

    credits_text_1 = '''Data: NOAA Great Lakes Environmental Research Laboratory'''
    credits_text_2 = '''Graphic: @JacobBruxer'''

    title = '<br>Great Lakes Surface Water Temperatures (1995-{})'.format(end_year)

    subtitle = '<br>' + credits_text_1.format(end_year) + \
               '<br>URL : https://coastwatch.glerl.noaa.gov/statistic/statistic.html' + \
               '<br><br>' + credits_text_2

    width = PLOT_WIDTH
    height = 100

    title= Div(text=title.format(end_year),
           style={'font-size': '150%', 'color': 'black'},
           width=width, height=height)

    subtitle=Div(text=subtitle,
             style={'font-size': '70%', 'color': 'black'},
             width=width, height=height)
   
    slider = Slider(start=start_year,
                    end=end_year,
                    value=end_year,
                    step=1,
                    title='Select Year',
                    width=width-30,
                    orientation='horizontal')
    
    # add slider with callback
    def callback(atrr, old, new):
        
        for lake in lakes.keys():
            lakes[lake]['plot']['selected'].glyph.y=str(slider.value)
    
            # find location of peak value for labelling
            labelx = lakes[lake]['data'][str(slider.value)].idxmax(axis=0) # index of max temp
            labely = lakes[lake]['data'].loc[:, str(slider.value)].max() # max temp

            # add label for selected year, locate at peak
            lakes[lake]['plot']['label'].x=labelx
            lakes[lake]['plot']['label'].y=labely
            lakes[lake]['plot']['label'].text=str(slider.value)

    slider.on_change('value', callback)

    plots = [None]*5
    plots[0] = lakes['Lake Superior']['plot']['fig']
    plots[1] = lakes['Lake Michigan']['plot']['fig']
    plots[2] = lakes['Lake Huron']['plot']['fig']
    plots[3] = lakes['Lake Erie']['plot']['fig']
    plots[4] = lakes['Lake Ontario']['plot']['fig']
    
    grid = layout([[plots[0]], plots[1], plots[2]],
           [plots[3], plots[4], [title, subtitle, slider]])

    for lake in [1,2,4]:
        plots[lake].yaxis.axis_label = None
        plots[lake].plot_width = PLOT_WIDTH - 25
        
    doc.add_root(grid)

show(modify_doc)