---
### Libraries and dependencies
Run this cell to install all necessary dependencies

In [None]:
! pip install pytz==2017.2 fileupload==0.1.2 ipywidgets==6.0.0 pandas==0.20.1 numpy==1.12.1 matplotlib==2.0.2 seaborn==0.8.0
! jupyter nbextension install --py fileupload 
! jupyter nbextension enable --py fileupload
! jupyter nbextension install --py widgetsnbextension 
! jupyter nbextension enable --py widgetsnbextension
! pip install cufflinks --upgrade
! pip install plotly --upgrade

---
### Import packages and create modules

In [None]:
from IPython.display import display, Markdown, FileLink, FileLinks, clear_output, HTML
import io, pytz, os, time, datetime, fileupload
import ipywidgets as widgets
import pandas as pd
import numpy as np
import cufflinks as cf
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import plotly as ply
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Layout
import plotly.figure_factory as ff
from plotly.figure_factory import create_2d_density

sns.set(color_codes=True)
%matplotlib inline
matplotlib.style.use('seaborn-whitegrid')
init_notebook_mode(connected=True)
cf.go_offline()

---
### Import data

In [None]:
start_date = 0
end_date = 0

def _upload():
    
    _upload_widget = fileupload.FileUploadWidget()
    _tz_widget = widgets.Dropdown(options=pytz.common_timezones, value='UTC', description='Timezone: ')
    
    def _cb(change):
        # get file
        decoded = io.StringIO(change['owner'].data.decode('utf-8'))
        filename = change['owner'].filename 
        fileData = io.StringIO(change['new'].decode('utf-8'))
        df = pd.read_csv(fileData).set_index('Time')
          
        # prepare dataframe
        print df.index
        df.index = pd.to_datetime(df.index).tz_localize('UTC').tz_convert(_tz_widget.value)
        df.sort_index(inplace=True)
        df = df.groupby(pd.TimeGrouper(freq='2Min')).aggregate(np.mean)
        df.drop([i for i in df.columns if 'Unnamed' in i], axis=1, inplace=True)

        readings[filename] = df[df.index > '2001-01-01T00:00:01Z']
        if start_date > 0: readings[filename] = df[df.index > start_date]
        if end_date > 0: readings[filename] = df[df.index < end_date]
        listFiles(filename)
    
    # widgets
    _upload_widget.observe(_cb, names='data')
    _hb = widgets.HBox([_upload_widget, _tz_widget, widgets.HTML(' ')])
    display(_hb)

def delFile(b):
    clear_output()
    for d in list(b.hbl.children): d.close()
    readings.pop(b.f)

def describeFile(b):
    clear_output()
    display(readings[b.f].describe())
    
# def exportFile(b):
#     export_dir = 'exports'
#     if not os.path.exists(export_dir): os.mkdir(export_dir)
#     savePath = os.path.join(export_dir, b.f+'_clean_'+datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%dT%H%M%S')
# +'.csv')
#     if not os.path.exists(savePath):
#         readings[b.f].to_csv(savePath, sep=",")
#         display(FileLink(savePath))
#     else:
#         display(widgets.HTML(' File Already exists!'))
    
def listFiles(filename):
#     clear_output()
    temp = list(fileList.children)
    cb = widgets.Button(icon='close',layout=widgets.Layout(width='30px'))
    cb.on_click(delFile)
    cb.f = filename
#     eb = widgets.Button(description='Export processed CSV', layout=widgets.Layout(width='180px'))
#     eb.on_click(exportFile)
#     eb.f = filename
    sb = widgets.Button(description='describe', layout=widgets.Layout(width='80px'))
    sb.on_click(describeFile)
    sb.f = filename  
    hbl = widgets.HBox([cb, widgets.HTML(' <b>'+filename+'</b> \t'), sb])
    cb.hbl = hbl
    temp.append(hbl)
    fileList.children = temp

readings = {}
display(widgets.HTML('<hr><h3>Select CSV files (remember change the timezone!)</h3>'))
_upload()
fileList = widgets.VBox([widgets.HTML('<hr>')])
display(fileList)


---
## Plots

Right click to "Save as image" on the graph itself

---
### Time Series Plots

In [None]:
# Plot Y limits
setLimits = False
maxY = 15000
minY = 0

toshow = []
# meanTable = []

def show_sensors(Source):
    _sensor_drop.options = [s for s in list(readings[Source].columns)]
    _sensor_drop.source = Source
    _min_date.value = readings[Source].index.min()._short_repr
    _max_date.value = readings[Source].index.max()._short_repr

def clear_all(b):
    clear_output()
    del toshow[:]
        
def add_sensor(b):
    clear_output()
    d = [_sensor_drop.source, _sensor_drop.value]
    if d not in toshow: toshow.append(d)
    plot_data = readings[toshow[0][0]].loc[:,(toshow[0][1],)]
    
    if b.slice_time:
        plot_data = plot_data[plot_data.index > _min_date.value]
        plot_data = plot_data[plot_data.index < _max_date.value]
    
    if len(toshow) > 1:
        for i in range(1, len(toshow)):
            plot_data = pd.merge(plot_data, readings[toshow[i][0]].loc[:,(toshow[i][1],)], left_index=True, right_index=True)
            
    print '-------------------------------------'
    print ' Medias:\n'
    meanTable = []
    for d in toshow:
        myMean = ' ' + d[0]  + "\t" + d[1] + "\t"
        meanTable.append(myMean)   
    res = plot_data.mean()
    for i in range(len(meanTable)): print meanTable[i] + '%.2f' % (res[i])
    print '-------------------------------------'

    # Change columns naming
    changed = []
    for i in range(len(plot_data.columns)):
        changed.append(toshow[i][0] + ' - '+ plot_data.columns[i])
    plot_data.columns = changed
    
    #   print plot_data
    if setLimits: 
        layout = go.Layout(legend=dict(x=-.1, y=1.2) ,xaxis=dict(title='Time'), yaxis=dict(range=[minY, maxY],zeroline=True,title='Measured Value',zerolinecolor='#990000',zerolinewidth=1))
    else:
        layout = go.Layout(legend=dict(x=-.1, y=1.2) ,xaxis=dict(title='Time'), yaxis=dict(zeroline=True,title='Measured Value',zerolinecolor='#990000',zerolinewidth=1))
    
    figure = plot_data.iplot(kind='scatter', asFigure=True, layout = layout)
    ply.offline.iplot(figure)
    
def reset_time(b):
    _min_date.value = readings[b.src.value].index.min()._short_repr
    _max_date.value = readings[b.src.value].index.max()._short_repr

layout=widgets.Layout(width='350px')
_kit = widgets.Dropdown(options=[k for k in readings.keys()], layout=layout)
_kit_drop = widgets.interactive(show_sensors, Source=_kit, layout=layout)
_sensor_drop = widgets.Dropdown(layout=layout)
_b_add = widgets.Button(description='Add to Plot', layout=widgets.Layout(width='100px'))
_b_add.on_click(add_sensor)
_b_add.slice_time = False
_b_reset = widgets.Button(description='Clear all', layout=widgets.Layout(width='100px'))
_b_reset.on_click(clear_all)
_sensor_box = widgets.HBox([_sensor_drop, _b_add, _b_reset])
_min_date = widgets.Text(description='Start date:', layout=layout)
_max_date = widgets.Text(description='End date:', layout=layout)
_b_apply_time = _b_reset = widgets.Button(description='Apply dates', layout=widgets.Layout(width='100px'))
_b_apply_time.on_click(add_sensor)
_b_apply_time.slice_time = True
_b_reset_time = _b_reset = widgets.Button(description='Reset dates', layout=widgets.Layout(width='100px'))
_b_reset_time.on_click(reset_time)
_b_reset_time.src = _kit
_time_box = widgets.HBox([_max_date, _b_reset_time, _b_apply_time])
root_box = widgets.VBox([_kit_drop, _sensor_box, _min_date, _time_box])
display(root_box)

# Extras

In [None]:
# 
def paint(Source):
    clear_output()
    sns.set(font_scale=1.4)
    g = sns.PairGrid(readings.values()[0])
    g = g.map(plt.scatter)

_kit = widgets.Dropdown(options=[k for k in readings.keys()], layout=layout)
_kit_drop = widgets.interactive(paint, Source=_kit, layout=layout)
display(_kit_drop)

# Sensor Correlations

In [None]:
cropTime = False
min_date = "2001-01-01 00:00:01"
max_date = "2001-01-01 00:00:01"
doubleAxis = True
from ipywidgets import interact, interactive, fixed, interact_manual

def show_sensors_A(Source):
    A_sensors_drop.options = [s for s in list(readings[Source].columns)]
    A_sensors_drop.source = Source
    minCropDate.value = readings[Source].index.min()._short_repr
    maxCropDate.value = readings[Source].index.max()._short_repr
    
def show_sensors_B(Source):
    B_sensors_drop.options = [s for s in list(readings[Source].columns)]
    B_sensors_drop.source = Source
    minCropDate.value = readings[Source].index.min()._short_repr
    maxCropDate.value = readings[Source].index.max()._short_repr
    
def redraw(b):
    cropTime = cropTimeCheck.value
    doubleAxis = doubleAxisCheck.value
    min_date = minCropDate.value
    max_date = maxCropDate.value
    mergedData = pd.merge(readings[A_kit.value].loc[:,(A_sensors_drop.value,)], readings[B_kit.value].loc[:,(B_sensors_drop.value,)], left_index=True, right_index=True, suffixes=('_'+A_kit.value, '_'+B_kit.value))
    clear_output()
    
    if cropTime:
        mergedData = mergedData[mergedData.index > min_date]
        mergedData = mergedData[mergedData.index < max_date]
        
    #jointplot
    df = pd.DataFrame()
    A = A_sensors_drop.value + ' - ' + A_kit.value
    B = B_sensors_drop.value + ' - ' + B_kit.value
    df[A] = mergedData.iloc[:,0]
    df[B] = mergedData.iloc[:,1]
    
    sns.set(font_scale=1.3)
    sns.jointplot(A, B, data=df, kind="reg", color="b", size=12, scatter_kws={"s": 80});
    print "data from " + str(df.index.min()) + " to " + str(df.index.max())                      
    pearsonCorr = list(df.corr('pearson')[list(df.columns)[0]])[-1]
    print 'Pearson correlation coefficient: ' + str(pearsonCorr)
    print 'Coefficient of determination R²: ' + str(pearsonCorr*pearsonCorr)

    if cropTime: 
        
        if (doubleAxis):
            layout = go.Layout(
            legend=dict(x=-.1, y=1.2), 
            xaxis=dict(range=[min_date, max_date],title='Time'), 
            yaxis=dict(zeroline=True, title=A, titlefont=dict(color='rgb(0,97,255)'), tickfont=dict(color='rgb(0,97,255)')),
            yaxis2=dict(title=B,titlefont=dict(color='rgb(255,165,0)'), tickfont=dict(color='rgb(255,165,0)'), overlaying='y', side='right')
            )
        else:
            layout = go.Layout(
            legend=dict(x=-.1, y=1.2), 
            xaxis=dict(range=[min_date, max_date],title='Time'), 
            yaxis=dict(zeroline=True, title=A, titlefont=dict(color='rgb(0,97,255)'), tickfont=dict(color='rgb(0,97,255)')),
            )
            
    else:
        if (doubleAxis):
            layout = go.Layout(
            legend=dict(x=-.1, y=1.2), 
            xaxis=dict(title='Time'), 
            yaxis=dict(title=A, titlefont=dict(color='rgb(0,97,255)'), tickfont=dict(color='rgb(0,97,255)')),
            yaxis2=dict(title=B, titlefont=dict(color='rgb(255,165,0)'), tickfont=dict(color='rgb(255,165,0)'), overlaying='y', side='right')
            )
        else:
            layout = go.Layout(
            legend=dict(x=-.1, y=1.2), 
            xaxis=dict(title='Time'), 
            yaxis=dict(zeroline=True, title=A, titlefont=dict(color='rgb(0,97,255)'), tickfont=dict(color='rgb(0,97,255)')),
            )
        
    trace0 = go.Scatter(x=df[A].index, y=df[A], name = A,line = dict(color='rgb(0,97,255)'))
    
    if (doubleAxis):
        trace1 = go.Scatter(x=df[B].index,y=df[B],name=B, yaxis='y2', line = dict(color='rgb(255,165,0)'))
    else:
        trace1 = go.Scatter(x=df[B].index,y=df[B],name=B, line = dict(color='rgb(255,165,0)'))
    data = [trace0, trace1]
    figure = go.Figure(data=data, layout=layout)
    ply.offline.iplot(figure)

    # Delta 
    delta = df[A]-df[B]
    trace0 = go.Scatter(x = df[A].index, y = delta, mode = 'lines')
    if cropTime: 
        layout = go.Layout(legend=dict(x=-.1, y=1.2) ,xaxis=dict(range=[min_date, max_date],title='Time'), yaxis=dict(zeroline=True,title='Delta',zerolinecolor='#990000',zerolinewidth=1))
    else:
        layout = go.Layout(legend=dict(x=-.1, y=1.2) ,xaxis=dict(title='Time'), yaxis=dict(zeroline=True,title='Delta',zerolinecolor='#990000',zerolinewidth=1))
    data = [trace0]
    fig = go.Figure(data=data, layout=layout)
    ply.offline.iplot(fig)
    
    # Ratio
    ratio = df[A]*1./df[B]
    trace0 = go.Scatter(x = df[A].index, y = ratio, mode = 'lines')
    if cropTime: 
        layout = go.Layout(legend=dict(x=-.1, y=1.2) ,xaxis=dict(range=[min_date, max_date],title='Time'), yaxis=dict(zeroline=True,title='Measured Value',zerolinecolor='#990000',zerolinewidth=1))
    else:
        layout = go.Layout(legend=dict(x=-.1, y=1.2) ,xaxis=dict(title='Time'), yaxis=dict(zeroline=True,title='Ratio',zerolinecolor='#990000',zerolinewidth=1))
    data = [trace0]
    fig = go.Figure(data=data, layout=layout)
    ply.offline.iplot(fig)
    
    # Rolling correlation
    fig = plt.figure(figsize=(15,6))
    roll = mergedData.iloc[:,0].rolling(12).corr(mergedData.iloc[:,1])
    trace0 = go.Scatter(x = df[A].index, y = roll, mode = 'lines')
    if cropTime: 
        layout = go.Layout(legend=dict(x=-.1, y=1.2) ,xaxis=dict(range=[min_date, max_date],title='Time'), yaxis=dict(zeroline=True,title='Rolling Average',zerolinecolor='#990000',zerolinewidth=1))
    else:
        layout = go.Layout(legend=dict(x=-.1, y=1.2) ,xaxis=dict(title='Time'), yaxis=dict(zeroline=True,title='Rolling Correlation',zerolinecolor='#990000',zerolinewidth=1))
    data = [trace0]
    fig = go.Figure(data=data, layout=layout)
    ply.offline.iplot(fig)
    
if len(readings) < 1: print "Please load some data first..."
else:
    
    layout=widgets.Layout(width='350px')
    b_redraw = widgets.Button(description='Redraw')
    b_redraw.on_click(redraw)
    doubleAxisCheck = widgets.Checkbox(value=False, description='Secondary y axis', disabled=False)
    
    A_kit = widgets.Dropdown(options=[k for k in readings.keys()], layout=widgets.Layout(width='350px') ,value=readings.keys()[0])
    A_kit_drop = widgets.interactive(show_sensors_A, Source=A_kit, layout=layout)
    A_sensors_drop = widgets.Dropdown(layout=widgets.Layout(width='350px'))
    show_sensors_A(readings.keys()[0])
    
    B_kit = widgets.Dropdown(options=[k for k in readings.keys()], layout=widgets.Layout(width='350px'), value=readings.keys()[1])
    B_kit_drop = widgets.interactive(show_sensors_B, Source= B_kit, layout=layout)
    B_sensors_drop = widgets.Dropdown(layout=widgets.Layout(width='350px'))
    show_sensors_B(readings.keys()[1])
    
    cropTimeCheck = widgets.Checkbox(value=False,description='Crop Data in X axis', disabled=False)
    minCropDate = widgets.Text(description='Start date:', layout=layout)
    maxCropDate = widgets.Text(description='End date:', layout=layout)
    
    draw_box = widgets.HBox([b_redraw, doubleAxisCheck], layout=widgets.Layout(justify_content='space-between'))
    kit_box = widgets.HBox([A_kit, widgets.HTML('<h4><< Data source >></h4>') , B_kit], layout=widgets.Layout(justify_content='space-between'))
    sensor_box = widgets.HBox([A_sensors_drop, widgets.HTML('<h4><< Sensor selection >></h4>') , B_sensors_drop], layout=widgets.Layout(justify_content='space-between'))
    crop_box = widgets.HBox([cropTimeCheck, minCropDate, maxCropDate], layout=widgets.Layout(justify_content='space-between'))
    root_box = widgets.VBox([draw_box, kit_box, sensor_box, crop_box])
    
    display(root_box)
    
    #redraw(b_redraw)

## To add
Nice features to add in the list for this analysis

In [None]:
# 1. Interactive 2d density plot as below (interesting to select and explore
# if we calculate pearsons on data batches)
from plotly.figure_factory._2d_density import create_2d_density

    # Make data points
t = np.linspace(-1,1.2,2000)
x = (t**3)+(0.3*np.random.randn(2000))
y = (t**6)+(0.3*np.random.randn(2000))
    
    # Create a figure
fig = ff.create_2d_density(x, y)
ply.offline.iplot(fig)

# 2. Boxes to input formulas that will create future channels in the pd dataframe