# Plot interactive hourly/daily average noise/load using Bokeh
The final plot is very busy looking but allows you to click the legend to remove/add lines from the plot

In [8]:
from datetime import datetime, timedelta
import pandas as pd

# Load modules for interactive plotting
import bokeh
from bokeh.models import Line, Span, ColumnDataSource, DatetimeTickFormatter, HoverTool, Range1d, LinearAxis
from bokeh.plotting import figure, output_notebook, show, output_file
import time

output_notebook()

In [2]:
# Set x-axis limits for plotting
# datetime(YEAR, MONTH, DAY)
tmin = datetime(2020, 2, 9)
tmax = datetime(2020, 5, 15)
datadir = './Data/'  

# Load seismic data
fname_n = 'LD.CPNY.2020-02-09.2020-05-15.5_15Hz.BHZ'
df_noise = pd.read_csv(datadir+fname_n+'.csv',parse_dates=['t_cent'])

# Load N.Y.C. electricity data
fname_l = 'load_reduction_hourly_NYC'
df_load = pd.read_csv(datadir+fname_l+'.csv',parse_dates=['Date'])
# df_load.load_resid = df_load.load_resid.fillna(df_load.load_resid.mean())
# Calculat daily averages
dt_hr = (df_load.Date[1]-df_load.Date[0]).seconds/60/60  # Hours between samples
df_load['load_daily_av'] = df_load.load_resid.rolling(int(24/dt_hr),win_type='boxcar').mean()

# NYC stay at home 2020/3/22 8pm EST (UTC - 4)
nyc_SAH = datetime(2020,3,22,20,0) + timedelta(0,4*60*60)

# NYC First COVID-19 death 2020/3/14 EST (UTC - 4)
nyc_1st = datetime(2020,3,14,0,0)

# NYC subway closed 1am-5am 2020/5/6 EST (UTC - 4)
nyc_subway = datetime(2020,5,6,0,0)


# Manually shift timezones
Bokeh does not understand timezones. To get around this we 

1) shift from UTC to the correct timezone  
2) measure the offset from UTC time   
3) make a dummy datetime vector in UTC time      
4) add the offset to the dummy vector    
5) save the shifted dummy vector as the true time file    

It's pretty clumsy but seems to work.

In [3]:
# Convert timestamps to NYC local time
df_noise = df_noise.set_index('t_cent').tz_localize('UTC').tz_convert('America/New_York')
df_noise = df_noise.reset_index()

# Already in NYC local time... but need to trick bokeh to think that it's UTC because will eventually need to shift time zones
df_load = df_load.set_index('Date').tz_localize('UTC')
df_load = df_load.reset_index()

# Manually shift timezone to NYC local time since Bokeh doesn't understand timezones
df_noisenyc = df_noise.copy()
df_noisenyc['t_cent'] = df_noisenyc.t_cent.dt.tz_localize(None)
for ii, f in enumerate(df_noise):
    hr_utcoffset = pd.Timestamp.utcoffset(df_noise.t_cent[ii]).total_seconds()/60/60
    df_noisenyc['t_cent'][ii] = df_noise['t_cent'][ii] + pd.Timedelta(hours=hr_utcoffset)
df_noise = df_noisenyc

## Load data already in local time
# df_loadnyc = df_load.copy()
# df_loadnyc['Date'] = df_loadnyc.Date.dt.tz_localize(None)
# for ii, f in enumerate(df_load):
#     hr_utcoffset = pd.Timestamp.utcoffset(df_load.Date[ii]).total_seconds()/60/60
#     df_loadnyc['Date'][ii] = df_load['Date'][ii] + pd.Timedelta(hours=hr_utcoffset)
# df_load = df_loadnyc

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


## Define Bokeh Figure Properties

In [4]:
# Build interactive plots using Bokeh

# Define figure attributes
p = figure(x_axis_type='datetime',plot_width=800, plot_height=500, sizing_mode = 'scale_width',
           x_range=(df_noise.t_cent.min(),df_noise.t_cent.max()), y_range=([df_noise.disp_avg.min()*1e9*0.7, df_noise.disp_avg.max()*1e9*1.05]),
           x_axis_label='Date', y_axis_label='Average Ground Displacement (nm)', title='NYC Seismic Noise Correlation with Electricity Usage',
           tools=['save','box_zoom','xwheel_zoom','ywheel_zoom','reset','crosshair','pan'])

# Setting the second y axis range name and range
p.extra_y_ranges = {"load_resid": Range1d(start=df_load.load_resid.min()*1.05, end=df_load.load_resid.max()*1.05)}
# Adding the second axis to the plot.  
p.add_layout(LinearAxis(y_range_name="load_resid", axis_label='Electricity Usage Relative to Previous 4 Years (%)'), 'right')



## Plot lines for hourly and daily noise and load

In [5]:
# create a datetime string for hover tool
df_noise['t_centstr'] = df_noise.t_cent.dt.strftime("%Y-%m-%d %H:%M")
df_load['Datestr'] = df_load.Date.dt.strftime("%Y-%m-%d %H:%M")

# Load dataframe into source
source = ColumnDataSource(data={'t_cent':df_noise.t_cent, 'disp_avg':df_noise.disp_avg*1e9, 
                                'daily_average':df_noise.daily_average*1e9, 't_centstr':df_noise.t_centstr})
source_load = ColumnDataSource(data={'Date':df_load.Date, 'load_resid':df_load.load_resid,
                                     'load_daily_av':df_load.load_daily_av, 'Datestr':df_load.Datestr})

# PLOT DATA
# Plot noise hourly
l1 = p.line('t_cent', 'disp_avg', source=source, line_color="lightsteelblue",
             line_width=2, legend_label="Noise Hourly")
p.add_tools(HoverTool(renderers=[l1], tooltips=[("Date", "@t_centstr"),("Noise Level","@disp_avg")]))

# Plot load hourly
l2 = p.line('Date', 'load_resid', source=source_load, line_color="lightcoral",
             line_width=2, y_range_name = 'load_resid', legend_label="Load Hourly")
p.add_tools(HoverTool(renderers=[l2], tooltips=[("Date", "@Datestr"),("Residual Load","@load_resid")]))

# Plot noise daily average
l3 = p.line('t_cent', 'daily_average', source=source, line_color="steelblue",
             line_width=4, legend_label="Noise Daily")
p.add_tools(HoverTool(renderers=[l3], tooltips=[("Date", "@t_centstr"),("Noise Level","@daily_average")]))

# Plot load daily average
l4 = p.line('Date', 'load_daily_av', source=source_load, line_color="firebrick",
             line_width=4, y_range_name = 'load_resid', legend_label="Load Daily")
p.add_tools(HoverTool(renderers=[l4], tooltips=[("Date", "@Datestr"),("Residual Load","@load_daily_av")]))

## Plot vertical lines to mark important times

In [6]:
# Plot vertical lines marking stay at home and first nyc death
# First need to convert datetime to milliseconds for some reason...
nyc_SAH_ms = time.mktime(nyc_SAH.timetuple())*1000
nyc_1st_ms = time.mktime(nyc_1st.timetuple())*1000
nyc_subway_ms = time.mktime(nyc_subway.timetuple())*1000

# Add vertical line for first NYC fatality
nyc_1st_start = Span(location=nyc_1st_ms, dimension='height', line_color='black',
                     line_dash='dashed', line_width=2)
p.add_layout(nyc_1st_start)

# Add vertical line for stay at home order
nyc_SAH_start = Span(location=nyc_SAH_ms, dimension='height', line_color='black',
                     line_dash='dashed', line_width=2)
p.add_layout(nyc_SAH_start)

# Add vertical line when NYC subway closes
nyc_subway_start = Span(location=nyc_subway_ms, dimension='height', line_color='black',
                        line_dash='dashed', line_width=2)
p.add_layout(nyc_subway_start)

## Make legend and plot

Hint: Click legend entries to toggle on/off

In [1]:
# Make legend
p.legend.location = "top_right"
p.legend.click_policy = "hide"
p.yaxis[1].major_label_text_color = "firebrick"
p.yaxis[1].axis_label_text_color = "firebrick"
p.yaxis[0].major_label_text_color = "steelblue"
p.yaxis[0].axis_label_text_color = "steelblue"

# Save local file
output_file(fname_n+'.'+fname_l+'.html')
# Show plot in browser
show(p)

NameError: name 'p' is not defined