#### Do workspace set up and lib imports

In [148]:
# setup and resource import
import pandas as pd
import numpy as np
import bokeh
from bokeh.plotting import figure, output_notebook, show
from bokeh.io import push_notebook, show, output_notebook
from bokeh.models import HoverTool, Label
from bokeh.layouts import gridplot
output_notebook(bokeh.resources.INLINE,hide_banner=True)
from bokeh.palettes import Dark2_5 as palette # select a palette
#import geopandas as gpd

# create a dictionary to hold our variable data
allDataFrames = {}

ImportError: No module named request

# Utility Functions

### getDataset
* __Parameters__
    * __p__ path to tabulated data (e.g. CSV)
    * __hr__ header row (actual row number, not index value)
    * __tcol__ column number with time interval (actual col number, not index value)
    * __d__ cell delimiter
    * __preview__ print a preview of the imported data (default FALSE)
    * __collate__ append dataframe to dict allDataFrames with filepath as key (default TRUE)
* __Returns__ pandas dataframe with imported data
* __Description__ adds local functionality to pd.read_csv()

### standardize_EAA_flow
* __Parameters__
    * __val__ numerical value representing flow in thousands acre-feet per year
* __Returns__ Value in cubic meters per second
* __Description__ EAA data is typically represented as thousands of acre-feet per year

### moving_average
* __Parameters__
    * __a__ Array of values
    * __[n=5]__ Time period intervals to average over
* __Returns__
* __Description__ 
* __Source__ https://stackoverflow.com/questions/14313510/how-to-calculate-moving-average-using-numpy

In [73]:
def getDataset(p,hr,tcol,d,key,preview=False,collate=True):
    data = pd.read_csv(p, sep=d,header=(hr-1), index_col=(tcol-1))
    if preview:
        print data.head()
    if collate:
        #allDataFrames["{}".format(p)]=data ## use filepath as key
        allDataFrames[key]=data ## use 'key' param as key
    return data

def _standardize_EAA_flowVal(a):
#    if isinstance(a, list):
#        for val in a:
#            val = _acre_feet_year_TO_cubic_meter_second(val)*1000
#        return None
#    else:
    return _acre_feet_year_TO_cubic_meter_second(a)*1000

def _acre_feet_year_TO_cubic_meter_second(ac_ft_yr_val):
    return (ac_ft_yr_val*1233.48)/(365*24*3600)

def _moving_average(a, n=5) :
#    ret = np.cumsum(a, dtype=float) ## dtype param not supported in current config
    ret = np.cumsum(a)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

utils = {
    _standardize_EAA_flowVal:None,
    _moving_average:None
}

In [145]:
def plotData(d,keyArray,t,standardize,moving_avg=0,pw=900,ph=400,
             hover=False,cite=False,hide=False,linked_x_range=None) :
    toolKit=[
        "box_zoom",
        "reset",
        "lasso_select",
        "pan"
    ]
    
    if hover:
        hover = HoverTool(
            tooltips=[
                ("Year", "@Year"),
                ("Value", "$y"),
            ],

            mode='vline'
        )
        toolKit.append(hover)
    
    if linked_x_range!=None:
        xr = linked_x_range.x_range
    else: xr=None
    
    p = figure(
        plot_width=pw, 
        plot_height=ph,
        title=t,
        y_axis_label="m^s * s^-1",
        x_axis_label=d[keyArray[0]].index.name,
        logo=None,
        toolbar_location='above',
        tools = toolKit,
        x_range = xr
    )
    
    colorIndex=0
    for i in range(0,len(d)): #iterate datasets
        df = d[keyArray[i]] # get current dataframe (indexed)
        for col in list(df):
            yVals = df[col]
            if moving_avg > 0:
                yVals = df[col].rolling(window=moving_avg).mean()
            if standardize != True:
                yVals.apply(_standardize_EAA_flowVal)
            p.line(
                df.index.values,
                yVals,
                legend=col,
                color=palette[colorIndex%(len(palette))],
                muted_color="grey", 
                muted_alpha=0.2,
                line_width=3,
                line_join="round"
            )
            p.circle(
                df.index.values,
                yVals,
                legend=col,
                color=palette[colorIndex%(len(palette))],
                muted_color="grey", 
                muted_alpha=0.2,
                size=6,
              #  line_join="round"
            )
            colorIndex+=1

    p.legend.location = "top_left"
    p.legend.click_policy="mute"
    
    if cite: 
        citation = Label(x=55, y=45, x_units='screen', y_units='screen', text_font_size = '6pt',
            text='Source: '+cite, render_mode='css',
            border_line_color='black', border_line_alpha=1.0,
            background_fill_color='white', background_fill_alpha=0.5)

        p.add_layout(citation)
    
    if not hide:show(p)
    return p

In [146]:
# import some test data
wellDischarge = getDataset('gw_viz_data/well_discharge.csv',1,1,',','wells')
springFlow = getDataset('gw_viz_data/springflow.csv',1,1,',','springs')

keys = [
    'wells',
    'springs'
]

figTitle = 'Trends in the Edwards Aquifer'
plotMovingAvg = 5
applyStandardization = "EAA_flowVal"

In [147]:
p1 = plotData({'1' : wellDischarge},['1'],'Wells',False,moving_avg=5,cite='EAA',hide=True)
p2 = plotData({'1' : springFlow},['1'],'Springs',False,moving_avg=5,hover=True,hide=True,linked_x_range=p1)

gp = gridplot([[p1],[p2]])
show(gp)

In [12]:
p = figure(
    plot_width=900, 
    plot_height=150,
    title=figTitle,
    y_axis_label="m^s * s^-1",
    x_axis_label=allDataFrames[keys[0]].index.name,
    logo=None,
    tools=[
        "box_zoom",
        "reset",
        "lasso_select",
        "pan"
    ]
)

colorIndex=0
for i in range(0,len(allDataFrames)): #iterate datasets
    table = allDataFrames[keys[i]] # get current dataframe (indexed)
    for col in list(table):
        yVals = table[col]
        if plotMovingAvg > 0:
            yVals = table[col].rolling(window=plotMovingAvg).mean()
        if applyStandardization != True:
            yVals.apply(_standardize_EAA_flowVal)
#            print yVals.head() # diagnostic
#            if previewData:
        p.line(
            table.index.values,
            yVals,
            legend=col,
#            legend=False,
            color=palette[colorIndex%(len(palette))],
            muted_color="grey", 
            muted_alpha=0.2,
            line_width=3,
            line_join="round"
        )
        colorIndex+=1

p.legend.location = "top_left"
p.legend.click_policy="mute"

p.plot_height = 400

show(p)

In [37]:
hover = HoverTool(
    tooltips=[
        ("index", "$index")
    ],
    
    mode='vline'
)

# x_axis_type="datetime"
p = figure(plot_width=900, plot_height=150,title="EAA Data",tools=[hover,"box_zoom","reset","lasso_select","pan"])
p1 = figure(plot_width=900, plot_height=150,title="EAA Data",x_range=p.x_range,tools=["box_zoom","reset","lasso_select","pan"])
p2 = figure(plot_width=900, plot_height=150,title="EAA Data",x_range=p.x_range,tools=["box_zoom","reset","lasso_select"])


# add a line renderer
p.line(years,values, line_width=3,muted_color="grey", muted_alpha=0.2, legend="Recharge",line_join="round")
p1.line(years,values2, line_width=3, line_color='red',muted_color="grey", muted_alpha=0.2, legend="Pumpage")
p2.line(years,values3, line_width=3, line_color='green',muted_color="grey", muted_alpha=0.2, legend="Springflow")

p.legend.location = "top_left"
p.legend.click_policy="mute"

gp = gridplot([[p],[p1],[p2]])

show(gp)

NameError: name 'years' is not defined

In [None]:
%%HTML
<iframe width="100%" height="640px" src="https://www.arcgis.com/apps/ImpactSummary/index.html?appid=cc7b5b338bff4f068d20acab013d053e" frameborder="0" scrolling="no"></iframe>

In [8]:
import geoplotlib
thedata = geoplotlib.utils.read_csv('geoplotlib_test_data.csv')
geoplotlib.dot(thedata)
geoplotlib.show()

Traceback (most recent call last):
  File "/Users/James/anaconda/lib/python2.7/site-packages/geoplotlib/__init__.py", line 32, in _runapp
    app.start()
  File "/Users/James/anaconda/lib/python2.7/site-packages/geoplotlib/core.py", line 365, in start
    force_zoom=self.geoplotlib_config.requested_zoom)
  File "/Users/James/anaconda/lib/python2.7/site-packages/geoplotlib/core.py", line 672, in fit
    left, top = self.lonlat_to_screen([bbox.west], [bbox.north])
  File "/Users/James/anaconda/lib/python2.7/site-packages/geoplotlib/core.py", line 750, in lonlat_to_screen
    lat_rad = np.radians(lat)
TypeError: ufunc 'radians' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''


In [None]:
#### TRASH ####
years = []
values = []
values2 = []
values3 = []

#for i in xrange(0,len(df)):
for r in xrange(0,len(df)):
    #for c in df.loc[r]:
    years.append(int(df.loc[r][0]))
    values.append(df.loc[r][1])
    values2.append(df.loc[r][2])
    values3.append(df.loc[r][3])
    
#for i in xrange(0,len(df)):
#    values2.append(i ** 2)
        
#valuesReversed = valuesReversed.reverse()
#print years
#print years
#print(df.iloc[3])


## OLD, don't run
for ds in datasets:
    allDataFrames["{}".format(ds[0])]=pd.read_csv(ds[0], sep=delimiter,header=(ds[1]-1), index_col=(ds[2]-1))

if previewData:
    for ds in datasets:
        print allDataFrames[ds[0]].head()

##### sources
* [color pallete cycling in bokeh](https://stackoverflow.com/questions/39839409/when-plotting-with-bokeh-how-do-you-automatically-cycle-through-a-color-pallett)
* [ink to ArcMap online viz](https://www.arcgis.com/apps/ImpactSummary/index.html?appid=cc7b5b338bff4f068d20acab013d053e&edit=true)

##### notes
* could add aggregation feature to allow the sum of individual columns values per year