In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from random import randint
from bokeh.io import show, output_notebook, output_file, save
from bokeh.models import Band, ColumnDataSource, HoverTool, Legend, Row, Column
from bokeh.models.callbacks import CustomJS
from bokeh.transform import transform
from bokeh.plotting import figure
from bokeh.models import DataRange1d, Range1d, Step, LinearColorMapper, SingleIntervalTicker
from bokeh.palettes import OrRd, Blues, Reds
from bokeh.models.widgets import (DatePicker, Panel, Tabs, Select, 
                                  Slider, DataTable, DateFormatter, TableColumn, HTMLTemplateFormatter,
                                 StringFormatter, Button, Div)

import sys
sys.path.append('../src')
import htmltext

# Random Walks

In [3]:
# Define parameters for the walk
dims = 25
step_n = 365*3
step_set = [-1,0, 1]
np.random.seed(1)
shock_num = 5

# Simulate steps in 1D
step_shape = (step_n,dims)
steps = np.random.choice(a=step_set, size=step_shape)
path = np.concatenate([steps]).cumsum(0)

# Create Inputs

Create the two inputs:
- Column data source with time series data and limits
- Variable metadata dictionary

In [4]:
dt = pd.date_range(datetime.today()-pd.Timedelta(f'{step_n-1} days'), datetime.today(), normalize=True)
var_names = [f'x{i}' for i in range(dims)]

# DataFrame
df = pd.DataFrame(path, index=dt, columns=var_names)

for i in var_names:
    for j in range(shock_num):
        df[i] = df[i] + np.random.randint(5,10)*(np.arange(0, df.shape[0]) > np.random.randint(df.shape[0]))
        df[i] = df[i] + np.random.randint(-10,-5)*(np.arange(0, df.shape[0]) > np.random.randint(df.shape[0]))

# Create a variable very similar to x0
df[f'x{dims}'] = df['x0'] + 1*np.random.rand(df.shape[0]) + 50
for i in range(5):
    df[f'x{dims}'] = df[f'x{dims}'] + np.random.randint(-5,5)*(np.arange(0, df.shape[0]) > np.random.randint(df.shape[0]))
var_names.append(f'x{dims}')

        
# Calculate EWMA
df_ewm = df.ewm(alpha=0.1).mean()
df_ewm.columns = [i+'_ewm' for i in df_ewm.columns]
df = pd.concat([df, df_ewm], axis=1)

# Calculate UL and LL using benchmark period
for i in var_names:
    df[f'{i}_usl'] = df[f'{i}_ewm'] + 3*np.std(df[f'{i}_ewm'] - df[f'{i}']) 
    df[f'{i}_lsl'] = df[f'{i}_ewm'] - 3*np.std(df[f'{i}_ewm'] - df[f'{i}']) 


# Default plotting data
df['plotVar'] = df.loc[:,var_names[0]]
df['plotVar_ewm'] = df.loc[:,f'{var_names[0]}_ewm']
df['plotVar_usl'] = df.loc[:,f'{var_names[0]}_usl']
df['plotVar_lsl'] = df.loc[:,f'{var_names[0]}_lsl']
df.index.name='date'

# Input 1: ColumnDS
cds_tsplot = ColumnDataSource(df.reset_index())

# Variable categorization etc. 
categories = {
    'A':{
        'vars':[f'x{i}' for i in range(5)],
        'color':'#4E89AE'
    },
    'B':{
        'vars':[f'x{i}' for i in range(5,12)],
        'color':'#FFA372'
    },
    'C':{
        'vars':[f'x{i}' for i in range(12,dims+1)],
        'color':'#43658B'
    }
}

# Input 2: Variable metadata
var_meta = {
    name:{'category':'', 'color':'white'}
    for name in var_names
}

for k,v in categories.items():
    for var in v['vars']:
        var_meta[var]['category'] = k
        var_meta[var]['color'] = v['color']   

In [5]:
df.head()

Unnamed: 0_level_0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x23_usl,x23_lsl,x24_usl,x24_lsl,x25_usl,x25_lsl,plotVar,plotVar_ewm,plotVar_usl,plotVar_lsl
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-11-30,0,-1,-1,0,0,-1,-1,0,-1,0,...,7.566756,-5.566756,8.572084,-6.572084,57.228475,44.121221,0,0.0,6.364987,-6.364987
2017-12-01,-1,-1,-1,1,-1,0,-1,0,-1,0,...,8.093072,-5.04044,8.572084,-6.572084,56.86337,43.756116,-1,-0.526316,5.838672,-6.891303
2017-12-02,-1,-1,-2,0,-1,-1,0,-1,-2,0,...,7.898859,-5.234653,8.572084,-6.572084,56.581763,43.474509,-1,-0.701107,5.66388,-7.066094
2017-12-03,-1,0,-3,-1,0,0,0,-2,-1,1,...,8.093072,-5.04044,8.862866,-6.281302,56.444098,43.336845,-1,-0.78802,5.576968,-7.153007
2017-12-04,-2,0,-3,-2,-1,-1,1,-3,-1,0,...,7.964549,-5.168964,8.791859,-6.352309,56.216788,43.109535,-2,-1.083978,5.281009,-7.448966


In [6]:
var_meta

{'x0': {'category': 'A', 'color': '#4E89AE'},
 'x1': {'category': 'A', 'color': '#4E89AE'},
 'x2': {'category': 'A', 'color': '#4E89AE'},
 'x3': {'category': 'A', 'color': '#4E89AE'},
 'x4': {'category': 'A', 'color': '#4E89AE'},
 'x5': {'category': 'B', 'color': '#FFA372'},
 'x6': {'category': 'B', 'color': '#FFA372'},
 'x7': {'category': 'B', 'color': '#FFA372'},
 'x8': {'category': 'B', 'color': '#FFA372'},
 'x9': {'category': 'B', 'color': '#FFA372'},
 'x10': {'category': 'B', 'color': '#FFA372'},
 'x11': {'category': 'B', 'color': '#FFA372'},
 'x12': {'category': 'C', 'color': '#43658B'},
 'x13': {'category': 'C', 'color': '#43658B'},
 'x14': {'category': 'C', 'color': '#43658B'},
 'x15': {'category': 'C', 'color': '#43658B'},
 'x16': {'category': 'C', 'color': '#43658B'},
 'x17': {'category': 'C', 'color': '#43658B'},
 'x18': {'category': 'C', 'color': '#43658B'},
 'x19': {'category': 'C', 'color': '#43658B'},
 'x20': {'category': 'C', 'color': '#43658B'},
 'x21': {'category': 'C

# TimeSeries Plot

In [7]:
p = figure(x_axis_type='datetime', plot_height=300, plot_width=700, active_drag='box_select', tools='pan, box_select, box_zoom, reset, undo, redo')
p.line(x='date', y='plotVar', color='#7E8A97', nonselection_line_color='grey', line_width=4, alpha=0.9, source=cds_tsplot)
p.circle(x='date', y='plotVar', color='#7E8A97', nonselection_fill_color='grey', size=0, alpha=0.8, source=cds_tsplot)
p.line(x='date', y='plotVar_ewm', color='#7E8A97', nonselection_line_color='grey', line_dash="4 4", line_width=2, alpha=0.9, source=cds_tsplot)
p.line(x='date', y='plotVar_usl', color='#931A25', nonselection_line_color='#E97171', line_width=2, alpha=0.9, source=cds_tsplot)
p.circle(x='date', y='plotVar_usl', color='#931A25', nonselection_fill_color='#E97171', size=0, alpha=0.8, source=cds_tsplot)
p.line(x='date', y='plotVar_lsl', color='#931A25', nonselection_line_color='#E97171', line_width=2, alpha=0.9, source=cds_tsplot)
p.circle(x='date', y='plotVar_lsl', color='#931A25', nonselection_fill_color='#E97171', size=0, alpha=0.8, source=cds_tsplot)
p.xaxis.axis_label = 'Date'
p.yaxis.axis_label = 'x0'
plot_yaxis = p.yaxis[0]

# Select widgets

In [8]:
select = Select(title="Variable", value=var_names[0], options=var_names, width=200)
select_dist = Select(title="Distance Type", value='dtw', options=['euclid', 'dtw'], width=200)

# DataTable

In [9]:
var_rule = ''
for k,v in var_meta.items():
    s = """if(variable == "{var}" ){{return("{color}")}}""".format(var=k, color=v['color'])
    var_rule += s

slope_rule = """
    if(slope > 0 ){{return("#68B0AB")}}
    else if(slope == 0 ){{return("grey")}}
    else if(slope < 0 ){{return("#FF7E67")}}
"""
    
selection_summary_data = dict(
    variable=[k for k,v in var_meta.items()],
    group=[v['category'] for k,v in var_meta.items()],
    slope=[None for k,v in var_meta.items()],
    usl=[None for k,v in var_meta.items()],
    lsl=[None for k,v in var_meta.items()],
    dist=[None for k,v in var_meta.items()],
)
cds_selection_summary_data = ColumnDataSource(selection_summary_data)

template_slope="""
    <div style="background:<%= 
        (function colorfromint(){{{x}}}()) %>; 
        color: black"> 
    <%= value %>
    </div>
    """.format(x=slope_rule)

template_var="""
    <div style="background:<%= 
        (function colorfromint(){{{x}}}()) %>; 
        color: black"> 
    <%= value %>
    </div>
    """.format(x=var_rule)


columns = [
    TableColumn(
        field="variable", title="vairable", 
        #formatter=HTMLTemplateFormatter(template=template_var), 
        width = 75
    ),
    TableColumn(
        field="group", title="group", 
        formatter=HTMLTemplateFormatter(template=template_var), 
        width = 40,
    ),
    TableColumn(field='dist', title='distance', width = 75),
    TableColumn(
        field='slope', title='slope', 
        formatter=HTMLTemplateFormatter(template=template_slope), 
        width = 75
    ),
    TableColumn(field='usl', title='ucl', width = 20),
    TableColumn(field='lsl', title='lcl', width = 20),
]

data_table = DataTable(
    source=cds_selection_summary_data,
    columns=columns,
    fit_columns=True,
    selectable = True,
    sortable = True,
    width=300,
    height=270
)


# `CustomJS` Callbacks

In [10]:
cds_selection_summary_data.selected.js_on_change(
    'indices',
    CustomJS(
        args=dict(((k, eval(k)) for k in ['select', 'cds_selection_summary_data'])),
        code="""
            select.value = cds_selection_summary_data.data['variable'][cds_selection_summary_data.selected.indices]
            select.change.emit()
    """
    )
)

select.js_on_change(
    "value", 
    CustomJS(
        args=dict(((k, eval(k)) for k in ['cds_tsplot', 'select', 'cds_selection_summary_data', 'var_meta','plot_yaxis'])),
        code=f"""            
            const select_value = cb_obj.value
            cds_tsplot.data['plotVar'] = cds_tsplot.data[select_value]
            cds_tsplot.data['plotVar_ewm'] = cds_tsplot.data[select_value+"_ewm"]
            cds_tsplot.data['plotVar_usl'] = cds_tsplot.data[select_value+"_usl"]
            cds_tsplot.data['plotVar_lsl'] = cds_tsplot.data[select_value+"_lsl"]
            cds_tsplot.change.emit()
            plot_yaxis.axis_label = select_value; 
        """
    )
)

button = Button(label='Cluster segments', button_type="primary", width=100, css_classes=['cluster'])
button.js_on_click(
        CustomJS(
        args=dict(((k, eval(k)) for k in ['cds_tsplot', 'select', 'select_dist','cds_selection_summary_data', 'var_meta'])),
        code=f""" 
            {htmltext.sum_calc.format(alert='alert("Select a segment of the time series using the box select tool");')}
            {htmltext.plot_cluster}
        """
        )
)

button_dtw = Button(label='Calculate distance', button_type="primary", width=100)
button_dtw.js_on_click(
        CustomJS(
        args=dict(((k, eval(k)) for k in ['cds_tsplot', 'select', 'select_dist', 'cds_selection_summary_data', 'var_meta'])),
        code=f""" 
            {htmltext.dist_calc}
        """
        )
)

cds_tsplot.selected.js_on_change(
    'indices', 
    CustomJS(
        args=dict(((k, eval(k)) for k in ['cds_tsplot', 'select', 'cds_selection_summary_data', 'var_meta'])),
        code=f"""            
            {htmltext.sum_calc.format(alert="")}
        """
    )
)


In [11]:
div_head = Div(text=htmltext.div_head)

# Create Dashboard

In [12]:
output_file('../src/ts-cluster.html', title='TimeString')
dash = Column(div_head, Row(select, select_dist), Row(Column(p, Row(button)), Column(data_table, button_dtw)))
save(dash, template=htmltext.template)

'/Users/hasannagib/Desktop/projects/timestring/src/ts-cluster.html'