# Random notes
 - tap and box select tool: holding `shift` while clicking on bars allows you to select several bars

In [None]:
import numpy as np
import pandas as pd
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LinearColorMapper, tools, CustomJS
from bokeh.palettes import Oranges256
from bokeh.transform import transform

output_notebook(hide_banner=True)

In [None]:
data = pd.read_csv('../data/test_data_merged_10000.csv')
data

In [None]:
counts = data.isnull().sum().reset_index(name='Sum')
# counts['index']
counts

In [None]:
source = ColumnDataSource(data=counts)
TOOLS = "box_select, tap, reset"

# horizontal bar plot
# p = figure(title="Missing by column", y_range=counts['index'], width=960, height=960)
# p.hbar(y='index', right='Sum', source=source)
# p.xaxis.axis_label = "Count of missing"

# vertical bar plot
width = 0.5 # width of bars 
p = figure(title="Value bar chart", x_range=counts['index'], tools=TOOLS, width=960, height=960)
p.vbar(x='index', top='Sum', width=width, source=source)
p.xaxis.major_label_orientation = 'vertical'
p.yaxis.axis_label = "Number of missing values"

# make a custom javascript callback that exports the indices of the selected points to the Jupyter notebook
callback = CustomJS(args=dict(source=source), 
                    code="""
                         console.log('Running CustomJS callback now.');
                         var indices = source.selected.indices;
                         var data = source.selected.data;
                         var kernel = IPython.notebook.kernel;
                         kernel.execute("selected_indices = " + indices)
                         """)

# set the callback to run when a selection geometry event occurs in the figure
p.js_on_event('selectiongeometry', callback)

show(p)

In [None]:
# get data of selection
# TODO: list(...) problem when only selecting one bar
# list_selected_indices = list(selected_indices)
# selected_data = counts.iloc[list_selected_indices]
# selected_data
# selected_indices

In [None]:
data

In [None]:
data_missingness = data.isnull()
data_missingness = data_missingness.groupby(list(data_missingness)).size().reset_index(name='Count')
data_missingness = data_missingness.multiply(data_missingness['Count'], axis='index').loc[:, data_missingness.columns != 'Count']
data_missingness = data_missingness.reset_index()
data_missingness_long = pd.melt(data_missingness, id_vars=['index'])

source2 = ColumnDataSource(data_missingness_long)


In [None]:
pal = list(reversed(Oranges256))
pal[0] = '#FF000000'
colourmap = LinearColorMapper(palette=pal, low=0, high=data_missingness_long['value'].max())
p2 = figure(title="Missingness pattern", width=960, height=960, y_range=list(data_missingness), x_range=[str(idx) for idx in list(data_missingness.index)])
p2.background_fill_color = '#FFFFFF'
p2.grid.visible = False
p2.rect(y='variable', x='index', source=source2, width=1.05, height=1.05, fill_color=transform('value', colourmap), 
        line_color=None)
show(p2)