In [1]:
import pandas as pd
import pickle
from plotly.offline import iplot, init_notebook_mode
import plotly.graph_objs as go
from scipy.stats import kendalltau
from collections import defaultdict
import plotly.tools as tools

init_notebook_mode(connected=True)

In [2]:
florida = pickle.load(open("../data/florida.pkl", "rb"))
alberta = pickle.load(open("../data/alberta.pkl", "rb"))

frames = [florida, alberta]

data = pd.concat(frames)

cols = ['Microcystin (ug/L)', 'Total Nitrogen (ug/L)', 'Total Phosphorus (ug/L)', 'Secchi Depth (m)', 'Total Chlorophyll (ug/L)', 'Temperature (degrees celsius)']

alberta['DATETIME'] = pd.to_datetime(alberta['DATETIME'])
florida['DATETIME'] = pd.to_datetime(florida['DATETIME'])

alberta['YearSeason'] = pd.to_datetime(alberta['DATETIME']).map(lambda dt: dt.replace(day=1)).map(lambda dt: dt.replace(month=3*((dt.month%12 + 3)//3)-2))
alberta['YearMonth'] = pd.to_datetime(alberta['DATETIME']).map(lambda dt: dt.replace(day=1))
florida['YearSeason'] = pd.to_datetime(florida['DATETIME']).map(lambda dt: dt.replace(day=1)).map(lambda dt: dt.replace(month=3*((dt.month%12 + 3)//3)-2))
florida['YearMonth'] = pd.to_datetime(florida['DATETIME']).map(lambda dt: dt.replace(day=1))


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





In [4]:
for c in cols:
    alberta_x=alberta['YearSeason']
    alberta_y=alberta[c]

    florida_x=florida['YearSeason']
    florida_y=florida[c]
    
    layout = go.Layout(
        title= f'{c} vs Date', 
        xaxis={'title':'Date'}, 
        yaxis={'title': c}
    )

    a = go.Scatter(
        x=alberta_x,
        y=alberta_y,
        mode='markers',
        marker={
       'opacity': 0.6,
        },
        name='Alberta'
    )

    f = go.Scatter(
        x=florida_x,
        y=florida_y,
        mode='markers',
        marker={
       'opacity': 0.6,
        },
        name='Florida'
    )

    fig = {
        'data': [f, a],
        'layout': layout
    }

    iplot(fig)
    
    # strip values more than 3 standard deviations away, monthly data
    alberta_2 = alberta[((alberta[c] - alberta[c].mean()) / alberta[c].std()).abs() < 3]
    florida_2 = florida[((florida[c] - florida[c].mean()) / florida[c].std()).abs() < 3]
   
    alberta_x=alberta_2['YearMonth']
    alberta_y=alberta_2[c]

    florida_x=florida_2['YearMonth']
    florida_y=florida_2[c]
    
    layout = go.Layout(
        title= f'{c} vs Date - 3 SD, Monthly', 
        xaxis={'title':'Date'}, 
        yaxis={'title': c}
    )

    a = go.Scatter(
        x=alberta_x,
        y=alberta_y,
        mode='markers',
        marker={
       'opacity': 0.6,
        },
        name='Alberta'
    )

    f = go.Scatter(
        x=florida_x,
        y=florida_y,
        mode='markers',
        marker={
       'opacity': 0.6,
        },
        name='Florida'
    )

    fig = {
        'data': [f, a],
        'layout': layout
    }

    iplot(fig)
    
     # strip values more than 3 standard deviations away, seasonly data
    alberta_x=alberta_2['YearSeason']
    alberta_y=alberta_2[c]

    florida_x=florida_2['YearSeason']
    florida_y=florida_2[c]
    
    layout = go.Layout(
        title= f'{c} vs Date - 3 SD', 
        xaxis={'title':'Date'}, 
        yaxis={'title': c}
    )

    a = go.Scatter(
        x=alberta_x,
        y=alberta_y,
        mode='markers',
        marker={
       'opacity': 0.6,
        },
        name='Alberta'
    )

    f = go.Scatter(
        x=florida_x,
        y=florida_y,
        mode='markers',
        marker={
       'opacity': 0.6,
        },
        name='Florida'
    )

    fig = {
        'data': [f, a],
        'layout': layout
    }

    iplot(fig)




In [13]:
months = alberta.DATETIME.dt.to_period("M")
a_month = alberta.groupby(months)
a_month = a_month.agg(['sum', 'mean', 'std'])

In [61]:
for c in cols:
    fig = tools.make_subplots(rows=2, cols=1, subplot_titles=(f'{c} vs Date', f'% Change of {c} vs Date'))
    
    alberta_x=a_month.index.to_timestamp()
    alberta_y=a_month[c]['mean']
    alberta_y_percent=a_month[c]['mean'].pct_change()

    a = go.Scatter(
        x=alberta_x,
        y=alberta_y,
        mode='markers',
        marker={
       'opacity': 0.6,
        },
        name='Raw Value'
    )
    
    a_percent = go.Scatter(
        x=alberta_x,
        y=alberta_y_percent,
        mode='markers',
        marker={
       'opacity': 0.6,
        },
        name='% Change'
    )
    
    fig.append_trace(a, 1, 1)
    fig.append_trace(a_percent, 2, 1)
     
    fig['layout']['yaxis1'].update(title=c)
    fig['layout']['xaxis1'].update(title='Date')
    fig['layout']['yaxis2'].update(title=f'% Change of {c}')

    iplot(fig)

This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



This is the format of your plot grid:
[ (1,1) x1,y1 ]
[ (2,1) x2,y2 ]



In [11]:
layout = go.Layout(
    title= '% Change of Microcystin (ug/L) per Lake vs Date', 
    xaxis={'title':'Date'}, 
    yaxis={'title': 'Microcystin (ug/L)'}
)

for name, group in grouped:
    graph_num = int(group['graph'].head(1))
    alberta_x=group['DATETIME']
    alberta_y=group['Microcystin (ug/L)'].pct_change()

    a = go.Scatter(
        x=alberta_x,
        y=alberta_y,
        mode='lines',
        marker={
           'opacity': 0.8,
        },
        line = {
            'width': 1.5},
        name=f'{name}'
    )
    
    lakes[graph_num].append(a)

for i in range(1, n_bins):
    fig = {
        'data': lakes[i],
        'layout': layout
    }

    iplot(fig)

In [8]:
n_bins = 6;

alberta2 = alberta.copy()
alberta2['graph'] = pd.cut(alberta2.index, bins=n_bins, labels=False) + 1
grouped = alberta2.groupby('Body of Water Name')

lakes = defaultdict(list)

layout = go.Layout(
    title= 'Microcystin (ug/L) per Lake vs Date', 
    xaxis={'title':'Date'}, 
    yaxis={'title': 'Microcystin (ug/L)'}
)

for name, group in grouped:
    graph_num = int(group['graph'].head(1))
    alberta_x=group['DATETIME']
    alberta_y=group['Microcystin (ug/L)']

    a = go.Scatter(
        x=alberta_x,
        y=alberta_y,
        mode='lines',
        marker={
           'opacity': 0.8,
        },
        line = {
            'width': 1.5},
        name=f'{name}'
    )
    
    lakes[graph_num].append(a)

for i in range(1, n_bins):
    fig = {
        'data': lakes[i],
        'layout': layout
    }

    iplot(fig)


In [8]:
alberta.groupby(['Body of Water Name']).apply(lambda x: kendalltau(x['YearMonth'], x['Microcystin (ug/L)']))


invalid value encountered in double_scalars



Body of Water Name
ADAMSON LAKE                                   (-0.10540925533894598, 0.8005421074231263)
AMISK LAKE                                     (-0.5270462766947298, 0.20650729548542512)
ARM LAKE                                       (-0.18257418583505539, 0.7179816667337768)
ASTOTIN LAKE                                     (0.19999999999999998, 0.624206114766406)
BAPTISTE LAKE - NORTH BASIN                   (0.5280786895875597, 0.0023798164638097045)
BAPTISTE LAKE - SOUTH BASIN                     (0.4385599890615328, 0.01206950966700991)
BEARTRAP LAKE                                  (-0.03077287274483318, 0.8904027809514473)
BEAUVAIS LAKE                                 (-0.18352066281439178, 0.45601440387713243)
BEAVER LAKE                                   (-0.39999999999999997, 0.32718687779030586)
BELLEVUE LAKE                                                   (1.0, 0.1171850871981381)
BIG LAKE EAST/WEST BASIN COMP (AB05EA1550)                    (1.0, 0.04154006700