In [1]:
import csv

from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, GMapOptions, CustomJS, LogColorMapper, LinearColorMapper
from bokeh.plotting import gmap, figure
from bokeh.layouts import widgetbox, row, column, gridplot, layout
from bokeh.models.widgets import CheckboxGroup
from bokeh.models.widgets import Slider, Button, MultiSelect, Dropdown, Select
from bokeh.models.widgets import Panel, Tabs
from bokeh.models.widgets import Div
from bokeh.models.tools import HoverTool
import numpy as np
import scipy.special
import pickle

import pandas as pd
import geopandas as gp
from collections import defaultdict

from bokeh.palettes import YlGn as palette #YlGn RdYlGn


palette = palette[9]
palette.reverse()
color_mapper = LinearColorMapper(palette=palette)

In [2]:
# load data set
with open('pickles/grid_df.pkl', 'rb') as f:
    data = pickle.load(f)
    
# load metadata
metadata = defaultdict(lambda: ("", "", "", ""))

with open('metadata.csv') as f:
    reader = csv.reader(f)
    for myline in reader:
        metadata[myline[0]] = (myline[1], myline[2], myline[3], myline[4])

        
def getColName(col):
    return metadata[col][0]

def getColType(col):
    return metadata[col][1]

def getColTab(col):
    return metadata[col][2]

def getColDesc(col):
    return metadata[col][3]

In [3]:
# add alpha column (used to show/hide sites on the map)
# initially set all = 0.6 (all sites visible)
# will set to 0.0 for hidden sites as user manipulates sliders
data["alpha"] = 0.6*np.ones_like(data['geometry']).astype(float)

In [4]:
sum(data["substrate"]!="Soft")==0

False

In [5]:
# add lat and lon columns
data["lon"] = data["geometry"].apply(lambda poly: poly.centroid.x)
data["lat"] = data["geometry"].apply(lambda poly: poly.centroid.y)
data["xs"] = [data["geometry"][i].exterior.xy[0].tolist() for i in range(data.shape[0])]
data["ys"]  = [data["geometry"][i].exterior.xy[1].tolist() for i in range(data.shape[0])]

reserved_cols = ["lat", "lon", "alpha", "xs", "ys"]

In [6]:
# filter areas not on the coast
data = data[((data["lon"]<-121.131962) | (data["lat"]<36.216283)) & (data["lat"]<41.755749)]

In [7]:
# create bokeh map
output_file("gmap.html")
map_options = GMapOptions(lat=36.778259, lng=-119.417931, map_type="roadmap", zoom=7)

p = gmap("AIzaSyDVQ4hizSlxjKdLPV0hER9aZ85gSf9345w", map_options, title="California", width=600, height=800, logo=None) 

In [8]:
# preprocess data, step 1
# purpose: eliminate columns that Bokeh can't handle and transform columns with complex data types
cols = data.columns
new_cols = []

hover_cols = []
slider_cols = []

cur_vals = {}
min_vals = {}
max_vals = {}
all_vals = {}

#cols = ["xs", "ys", "alpha", "protected_areas", "land_distance", "county", "critical_species", "nes_estab_pct", "annual_avg_emplvl", "qcew_emp_pct", "unemployment_rate"]

def getDescText(x):
    if type(x)!=list:
        return str(x)
    elif len(x)==0:
        return ""
    elif type(x[0][1])!=str:
        return ", ".join(map(lambda value: "{:.1f}".format(value[2]), x))
    else:
        return ", ".join(map(lambda value: str(value[1]) + " (" + "{:.1f}".format(value[2]) + ")" , x))


for col in cols:
    print (col)
    if col=="geometry" or col=="polygon_id" or data[col].dtype == "bool":
        print ("ignoring column " + col)
    elif col in reserved_cols:
        # these are used internally
        new_cols.append(col)
    elif data[col].dtype == "float64" or data[col].dtype == "int64" or data[col].dtype == "float":
        # na columns are removed
        if not np.isnan(np.mean(data[col])):
            new_cols.append(col)
            slider_cols.append(col)
            hover_cols.append(col)
        else:
            print ("ignoring numerical column " + col + " because it contains NAs")
    elif getColType(col)=="categorical" or getColType(col)=="":
        col_slider = col + "_slider"
        metadata[col_slider] = metadata[col]
        
        data[col_slider] = data[col].apply(lambda x: [value[1] if type(x)==list else x for value in x ])
        data[col_slider] = data[col_slider].apply(lambda x: x if len(x)==0 else x[0])
        data[col_slider] = data[col_slider].apply(lambda x: x[0] if type(x)!=str else x)

        data[col]=data[col].apply(getDescText)

        all_vals[col_slider] = list(np.unique(data[col_slider]))
        new_cols.append(col_slider)
        new_cols.append(col)

        slider_cols.append(col_slider)
        hover_cols.append(col)
    else:
        # this turns Clay's arrays of tuples into values that Bokeh can handle
        col_slider = col + "_slider"
        metadata[col_slider] = metadata[col]

        data[col_slider]=data[col].apply(lambda x: [value[2] for value in x if type(x)==list])
        data[col_slider]=data[col_slider].apply(lambda x: np.NAN if len(x)==0 else min(x))



        data[col]=data[col].apply(getDescText)

        if not np.isnan(np.mean(data[col_slider])):
            new_cols.append(col_slider)
        else:
            # treat as strings
            data[col_slider]=data[col_slider].astype(str) 
            print ("converted " + col_slider + " to string")
            new_cols.append(col_slider)
        
        slider_cols.append(col_slider)
        hover_cols.append(col)
        new_cols.append(col)

data = data[new_cols]

print(slider_cols)
print(hover_cols)

geometry
ignoring column geometry
polygon_id
ignoring column polygon_id
land_distance
protected_areas
county
critical_species
nes_estab_pct
annual_avg_emplvl
qcew_emp_pct
unemployment_rate
biomass
depth
mean_sst
max_sst
min_sst
ndvi
z_min_light
z_mixedl
floor_temp
viable
ignoring column viable
boat_launches
halibut_trawl_sites
marinas
ports
oil_platforms
piers
shoretype
shoretype2
aerial_kelp
admin_kelp_bed
substrate
alpha
lon
lat
xs
ys
['land_distance', 'protected_areas_slider', 'county_slider', 'critical_species_slider', 'nes_estab_pct_slider', 'annual_avg_emplvl_slider', 'qcew_emp_pct_slider', 'unemployment_rate_slider', 'biomass', 'depth', 'mean_sst', 'max_sst', 'min_sst', 'ndvi', 'z_min_light', 'z_mixedl', 'floor_temp', 'boat_launches_slider', 'halibut_trawl_sites_slider', 'marinas_slider', 'ports_slider', 'oil_platforms_slider', 'piers_slider', 'shoretype_slider', 'shoretype2_slider', 'aerial_kelp_slider', 'admin_kelp_bed_slider', 'substrate_slider']
['land_distance', 'protected_

In [9]:
# preprocess data, step 2
# purpose: find max/min for each column, will be used as boundaries for sliders

cols = data.columns
new_cols = []

for col in slider_cols:

    if getColType(col)=="categorical" or getColType(col)=="":
        cur_vals[col] = all_vals[col]
    elif data[col].dtype == "float64" or data[col].dtype == "int64":
        min_vals[col] = np.min(data[col])
        max_vals[col] = np.max(data[col])
        if min_vals[col]!=max_vals[col]:
            cur_vals[col] = min_vals[col] # by default everything set to minimum, so all cells will light up
        else:
            slider_cols.remove(col)
            print ("skipping widget for " + col + " because minval=maxval="+str(min_vals[col]))

skipping widget for aerial_kelp_slider because minval=maxval=0.0


In [10]:
print(cur_vals)
print(min_vals)
print(max_vals)
print(all_vals)

{'land_distance': 0.0, 'protected_areas_slider': 0.0, 'county_slider': ['Alameda', 'Contra Costa', 'Del Norte', 'Humboldt', 'Los Angeles', 'Marin', 'Mendocino', 'Monterey', 'Orange', 'San Diego', 'San Francisco', 'San Luis Obispo', 'San Mateo', 'Santa Barbara', 'Santa Cruz', 'Solano', 'Sonoma', 'Ventura'], 'critical_species_slider': 0.0, 'nes_estab_pct_slider': 0.0, 'annual_avg_emplvl_slider': 0.0, 'qcew_emp_pct_slider': 0.0, 'unemployment_rate_slider': 0.0, 'biomass': 0.0, 'depth': -1129.4, 'mean_sst': 11.929241943359397, 'max_sst': 14.013989257812522, 'min_sst': 9.125994873046897, 'ndvi': -0.0243224230177494, 'z_min_light': 0.0, 'z_mixedl': 9.566535704525455, 'floor_temp': 1.5059999999999998, 'boat_launches_slider': 0.0, 'halibut_trawl_sites_slider': 0.0, 'marinas_slider': 0.0, 'ports_slider': 0.0, 'oil_platforms_slider': 0.0, 'piers_slider': 0.0, 'shoretype_slider': ['Beaches', 'Coastal Marsh', 'Hardened Shores', 'Rocky Shores', 'Tidal Flats'], 'shoretype2_slider': ['coarse-grained 

In [11]:
# add points to map
source = ColumnDataSource(data=data)

mypatches = p.patches(xs="xs", ys="ys", fill_color= {"field": slider_cols[0], "transform":color_mapper}, line_alpha="alpha", fill_alpha="alpha", source=source)


## Callback for sliders
Each time a slider is moved, re-compute alpha value for all cells, based on whether they are within the current value range.

In [12]:
# create callback code
# when a slider is moved, alpha values for all sites are recomputed
# alpha is set to 0.0 for sites that must be hidden based on slider selections

code = """
    debugger;

    var col = cb_obj.name;
    var selection = cb_obj.value;
    if (window.current_values == null) window.current_values = {};

    window.current_values[col]=selection;

"""

for col,val in cur_vals.items():
    if col not in reserved_cols:
        code += "if (window.current_values['"+col+"'] == null) window.current_values['"+col+"'] = "+str(val)+";"

code += """

    var data = source.data;
    var alpha = data['alpha'];

    for (var i = 0; i < alpha.length; i++) {
        alpha[i] = 0.0;
        if(
 """       
    
for col,val in cur_vals.items():
    if col not in reserved_cols:
        if getColType(col)=="categorical" or getColType(col)=="":
            code += "(data['"+col+"'][i]=='' || window.current_values['"+col+"'].indexOf(data['"+col+"'][i])!=-1) && "
        elif data[col].dtype == "float64" or data[col].dtype == "int64":
            code += "(isNaN(data['"+col+"'][i]) || window.current_values['"+col+"']<=data['"+col+"'][i]) && "


            
           
            
code += """
        1) alpha[i] = 0.6;
    }
        
    // emit update of data source
    source.change.emit();
"""


print(code)


    debugger;

    var col = cb_obj.name;
    var selection = cb_obj.value;
    if (window.current_values == null) window.current_values = {};

    window.current_values[col]=selection;

if (window.current_values['land_distance'] == null) window.current_values['land_distance'] = 0.0;if (window.current_values['protected_areas_slider'] == null) window.current_values['protected_areas_slider'] = 0.0;if (window.current_values['county_slider'] == null) window.current_values['county_slider'] = ['Alameda', 'Contra Costa', 'Del Norte', 'Humboldt', 'Los Angeles', 'Marin', 'Mendocino', 'Monterey', 'Orange', 'San Diego', 'San Francisco', 'San Luis Obispo', 'San Mateo', 'Santa Barbara', 'Santa Cruz', 'Solano', 'Sonoma', 'Ventura'];if (window.current_values['critical_species_slider'] == null) window.current_values['critical_species_slider'] = 0.0;if (window.current_values['nes_estab_pct_slider'] == null) window.current_values['nes_estab_pct_slider'] = 0.0;if (window.current_values['annual_avg_emplv

In [13]:
# create tabs

callback = CustomJS(args=dict(source=source), code=code)
tabnames = []
for col in slider_cols:
    print (col)
    if (getColTab(col)!=""):
        tabnames.append(getColTab(col))
tabnames = np.unique(tabnames)

print (tabnames)


land_distance
protected_areas_slider
county_slider
critical_species_slider
nes_estab_pct_slider
annual_avg_emplvl_slider
qcew_emp_pct_slider
unemployment_rate_slider
biomass
depth
mean_sst
max_sst
min_sst
ndvi
z_min_light
z_mixedl
floor_temp
boat_launches_slider
halibut_trawl_sites_slider
marinas_slider
ports_slider
oil_platforms_slider
piers_slider
shoretype_slider
shoretype2_slider
admin_kelp_bed_slider
substrate_slider
['Biological' 'Geographical' 'Human']


In [14]:
# create widgets and histograms


widgets = defaultdict(list)
 
for col in slider_cols:

    the_title = getColName(col)
    if getColType(col)!="" and getColType(col)!="categorical":
        the_title += " (" + getColType(col)+")"
            
    if col in reserved_cols:
        print ("skipping widget for " + col)
    elif col in all_vals.keys():
        # categorical
        short_names = [name[:25]+"..." if len(name)>25 else name for name in all_vals[col]]
        options = list(zip(all_vals[col], short_names))
        multi_select = MultiSelect(title=the_title, options=options, size = 6, width=200, value=all_vals[col], name=col)
        multi_select.js_on_change('value', callback)
        widgets[getColTab(col)].append(multi_select)
    elif data[col].dtype=='O':
        print ("skipping widget for " + col)
    elif data[col].dtype == "float64" or data[col].dtype == "int64":
        step = (max_vals[col]-min_vals[col])/100
                
        widget = Slider(start=min_vals[col], end=max_vals[col], value=cur_vals[col], step=step, title=the_title, name=col, width=180)
        widget.js_on_change('value', callback)
        
        histogram = figure(plot_width=220, plot_height=80, tools="", logo=None, css_classes=[col])
        hist, edges = np.histogram(data[col][~np.isnan(data[col])], density=True, bins=50)
        histogram.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:])

        widgets[getColTab(col)].append(column(widget, histogram))

In [15]:
hover = HoverTool(tooltips=[])
p.add_tools(hover)
menu = [(col, getColName(col)) for col in hover_cols if col not in reserved_cols]

multi_select = MultiSelect(title="Metrics Hover:", options=menu, size = 30)

units = [item for item in metadata.items() if item[0] not in reserved_cols]
units = {i[0]:i[1][1]  if i[1][1]!="categorical" else "" for i in units}
units_cds = ColumnDataSource(data = dict(keys = list(units.keys()), values = list(units.values())))


callback_m = CustomJS(args=dict(hover=hover, units = units_cds), code="""
    debugger;
    var unit = units.data;
    hover.tooltips = []
    const value = cb_obj.value;
    var names = cb_obj.options.reduce(function(map, obj) {
        map[obj[0]] = obj[1];
        return map;
    }, {});

    for (i=0; i<value.length; ++i){
        const name = value[i]
        var index = unit.keys.indexOf(name)
        hover.tooltips.push([names[name], "@"+name+" "+unit.values[index]])
    }
    """)

multi_select.js_on_change('value', callback_m)

palette_dict = ColumnDataSource(data=dict(palette=['#004529','#006837','#238443',
                                                   '#41ab5d','#78c679','#addd8e',
                                                   '#d9f0a3', '#f7fcb9', '#ffffe5'], 
                                          rpalette=['#ffffe5', '#f7fcb9', '#d9f0a3', 
                                                    '#addd8e', '#78c679', '#41ab5d', 
                                                    '#238443', '#006837', '#004529']))

callback_d = CustomJS(args=dict(patches=mypatches, p=p, source=source, palette = palette_dict), code="""
    //debugger;
    console.log("value", cb_obj.value)
    console.log("transform", patches.glyph.fill_color.transform.palette)
    patches.glyph.fill_color.field = cb_obj.value;
    if (cb_obj.value == "depth"){
        patches.glyph.fill_color.transform.palette = palette.data["palette"]
    } else {
        patches.glyph.fill_color.transform.palette = palette.data["rpalette"]
    }
    
    console.log("what is transform", patches.glyph.fill_color.transform.palette)
    source.change.emit();
    """)



menu = [(col, getColName(col)) for col in slider_cols if getColType(col)!="categorical"]
dropdown = Select(title="Metric Color Selection", value = menu[0][0], options=menu, callback=callback_d)

In [16]:
menu

[('land_distance', 'Land Distance'),
 ('protected_areas_slider', 'Protected Areas'),
 ('county_slider', 'Nearest County'),
 ('critical_species_slider', 'Critical Species'),
 ('nes_estab_pct_slider', 'Fishing Establishment Pct'),
 ('annual_avg_emplvl_slider', 'Annual Avg Empl Level'),
 ('qcew_emp_pct_slider', 'Fishing Employment Pct'),
 ('unemployment_rate_slider', 'County Unemployment Rate'),
 ('biomass', 'Kelp'),
 ('depth', 'Depth'),
 ('mean_sst', 'Mean Sea Surface Temp'),
 ('max_sst', 'Max Sea Surface Temp'),
 ('min_sst', 'Min Sea Surface Temp'),
 ('ndvi', 'NDVI'),
 ('z_min_light', 'Depth of Min Light'),
 ('z_mixedl', 'Mixed Layer Depth'),
 ('floor_temp', 'Sea Floor Temperature'),
 ('boat_launches_slider', 'Boat Launches'),
 ('halibut_trawl_sites_slider', 'Halibut Trawl Sites'),
 ('marinas_slider', 'Marinas'),
 ('ports_slider', 'Ports'),
 ('oil_platforms_slider', 'Oil Platforms'),
 ('piers_slider', 'Piers')]

In [17]:
# show chart and widgets
widget_cols = 3
alltabs = []

for name in tabnames:
    gplot = gridplot(widgets[name], ncols=widget_cols)
    tab = Panel(child=gplot, title=name)
    alltabs.append(tab)
    
menu_info = [(col, getColName(col)) for col in slider_cols if getColDesc(col)!=""]
div_info = Div(text=getColDesc(menu_info[0][0]), width=200, height=300)
                         
code="""
    debugger;
    selection = cb_obj.value;
    newtext = ""
    switch(selection) {
    """
for col in menu_info:
    code+="case '"+col[0]+"': newtext='" + getColDesc(col[0]) + "';break;"
code+="""
    }
    div_info.text = newtext;
"""

callback_info = CustomJS(args=dict(div_info=div_info), code=code)
dropdown_info = Select(value = menu_info[0][0], options=menu_info, callback=callback_info)

tab_info = Panel(child=column(dropdown_info, div_info), title="Info")
alltabs.append(tab_info)
  
tabs = Tabs(tabs=alltabs, width=400)



In [18]:
show(row(column(multi_select, dropdown), p, tabs))