# Visualization Design

By: *Tyler Biggs*

---

**Overview**

This notebook will go over the design of the vizualizations. It should also serve as a reference for future custom visualizations.

In [40]:
%load_ext autoreload
%autoreload 2
from pprint import pprint

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [79]:
import pandas as pd
import numpy as np

import bokeh as bk
import bokeh.io
import bokeh.models
import bokeh.layouts
import bokeh.plotting
bokeh.io.output_notebook()

In [6]:
# Path hack to allow imports from the parent directory.
import sys, os
sys.path.insert(0, os.path.abspath('../../'))

In [15]:
from isadream.isadream.models import utils
from isadream.isadream import io

---

## Dataflow

The data is transfered from the Drupal server as a `.json` file. Those files placed into a directory as the user requests them. That is, all the datasets that a user selects for any given visualziation are placed in a directory. These files are condensed into four dataframes per `.json` file.

In [55]:
# A demo json file is provided.
nmr_json_demo = utils.SIPOS_DEMO
demo_base_path = utils.BASE_PATH
print(nmr_json_demo, '\n', demo_base_path)

/home/tyler/git/isadream/isadream/demo_data/demo_json/sipos_2006_talanta_nmr_figs.json 
 /home/tyler/git/isadream/isadream/demo_data/


In [73]:
demo_json = io.read_idream_json(nmr_json_demo)
node = io.parse_json(demo_json)

---

## Viewing the data in each Assay (datafile) per .json

In [74]:
for assay in node.assays:
#     pprint(assay.column_data_source)
    display(pd.DataFrame.from_records(assay.column_data_source))

Unnamed: 0,"((Material_Property, Density, g/cm^3), ((Fake, 2.0), (Fake, 1.0)))","((Material_Property, Poor, Quality), ((Fake, 2.0), (Fake, 1.0)))","((Material_Property, Purity_by_Weight, Percent), ((Al(III), 1.0),))","((Measurement, ppm), ((Al(III), 1.0),))","((Measurement, ppm), ((Fake, 2.0), (Fake, 1.0)))","((Measurement, ppm), ((K+, 1.0), (OH-, 1.0)))","((Measurement_Condition, Molar), ((Al(III), 1.0),))","((Measurement_Condition, Molar), ((Fake, 2.0), (Fake, 1.0)))","((Measurement_Condition, Molar), ((K+, 1.0), (OH-, 1.0)))"
0,1.05,Poor,0.98,79.9,79.9,79.9,0.005,0.006,2.93
1,1.05,Poor,0.98,79.84,79.84,79.84,0.005,0.006,4.92
2,1.05,Poor,0.98,79.72,79.72,79.72,0.005,0.006,6.85
3,1.05,Poor,0.98,79.66,79.66,79.66,0.005,0.006,9.13
4,1.05,Poor,0.98,79.66,79.66,79.66,0.005,0.006,10.71


Unnamed: 0,"((Material_Property, Density, g/cm^3), ((Fake, 2.0), (Fake, 1.0)))","((Material_Property, Poor, Quality), ((Fake, 2.0), (Fake, 1.0)))","((Material_Property, Purity_by_Weight, Percent), ((Al(III), 1.0),))","((Measurement, ppm), ((Al(III), 1.0),))","((Measurement, ppm), ((Fake, 2.0), (Fake, 1.0)))","((Measurement, ppm), ((OH-, 1.0), (Li+, 1.0)))","((Measurement_Condition, Molar), ((Al(III), 1.0),))","((Measurement_Condition, Molar), ((Fake, 2.0), (Fake, 1.0)))","((Measurement_Condition, Molar), ((OH-, 1.0), (Li+, 1.0)))"
0,1.05,Poor,0.98,79.92,79.92,79.92,0.005,0.006,0.66
1,1.05,Poor,0.98,79.84,79.84,79.84,0.005,0.006,1.1
2,1.05,Poor,0.98,79.66,79.66,79.66,0.005,0.006,1.64
3,1.05,Poor,0.98,79.54,79.54,79.54,0.005,0.006,2.14
4,1.05,Poor,0.98,79.36,79.36,79.36,0.005,0.006,2.59
5,1.05,Poor,0.98,79.13,79.13,79.13,0.005,0.006,3.11
6,1.05,Poor,0.98,78.83,78.83,78.83,0.005,0.006,3.59
7,1.05,Poor,0.98,78.47,78.47,78.47,0.005,0.006,4.11


Unnamed: 0,"((Material_Property, Density, g/cm^3), ((Fake, 2.0), (Fake, 1.0)))","((Material_Property, Poor, Quality), ((Fake, 2.0), (Fake, 1.0)))","((Material_Property, Purity_by_Weight, Percent), ((Al(III), 1.0),))","((Measurement, ppm), ((Al(III), 1.0),))","((Measurement, ppm), ((Fake, 2.0), (Fake, 1.0)))","((Measurement, ppm), ((Na+, 1.0), (OH-, 1.0)))","((Measurement_Condition, Molar), ((Al(III), 1.0),))","((Measurement_Condition, Molar), ((Fake, 2.0), (Fake, 1.0)))","((Measurement_Condition, Molar), ((Na+, 1.0), (OH-, 1.0)))"
0,1.05,Poor,0.98,79.54,79.54,79.54,0.005,0.006,4.98
1,1.05,Poor,0.98,79.98,79.98,79.98,0.005,0.006,0.93
2,1.05,Poor,0.98,79.13,79.13,79.13,0.005,0.006,6.97
3,1.05,Poor,0.98,78.45,78.45,78.45,0.005,0.006,8.96
4,1.05,Poor,0.98,77.67,77.67,77.67,0.005,0.006,10.95
5,1.05,Poor,0.98,76.47,76.47,76.47,0.005,0.006,12.99
6,1.05,Poor,0.98,74.53,74.53,74.53,0.005,0.006,14.92
7,1.05,Poor,0.98,73.14,73.14,73.14,0.005,0.006,16.97
8,1.05,Poor,0.98,71.32,71.32,71.32,0.005,0.006,18.92


---

### Getting Subsets

In [75]:
df = pd.DataFrame.from_records(node.assays[0].column_data_source)
df.columns = pd.MultiIndex.from_tuples(df.columns)
df = df.T
df

Unnamed: 0,Unnamed: 1,0,1,2,3,4
"(Material_Property, Density, g/cm^3)","((Fake, 2.0), (Fake, 1.0))",1.05,1.05,1.05,1.05,1.05
"(Material_Property, Poor, Quality)","((Fake, 2.0), (Fake, 1.0))",Poor,Poor,Poor,Poor,Poor
"(Material_Property, Purity_by_Weight, Percent)","((Al(III), 1.0),)",0.98,0.98,0.98,0.98,0.98
"(Measurement, ppm)","((Al(III), 1.0),)",79.9,79.84,79.72,79.66,79.66
"(Measurement, ppm)","((Fake, 2.0), (Fake, 1.0))",79.9,79.84,79.72,79.66,79.66
"(Measurement, ppm)","((K+, 1.0), (OH-, 1.0))",79.9,79.84,79.72,79.66,79.66
"(Measurement_Condition, Molar)","((Al(III), 1.0),)",0.005,0.005,0.005,0.005,0.005
"(Measurement_Condition, Molar)","((Fake, 2.0), (Fake, 1.0))",0.006,0.006,0.006,0.006,0.006
"(Measurement_Condition, Molar)","((K+, 1.0), (OH-, 1.0))",2.93,4.92,6.85,9.13,10.71


In [95]:
molar_df = df.xs(('Measurement_Condition', 'Molar'))
molar_df

Unnamed: 0,0,1,2,3,4
"((Al(III), 1.0),)",0.005,0.005,0.005,0.005,0.005
"((Fake, 2.0), (Fake, 1.0))",0.006,0.006,0.006,0.006,0.006
"((K+, 1.0), (OH-, 1.0))",2.93,4.92,6.85,9.13,10.71


In [96]:
ppm_df = df.xs(('Measurement', 'ppm'))
ppm_df

Unnamed: 0,0,1,2,3,4
"((Al(III), 1.0),)",79.9,79.84,79.72,79.66,79.66
"((Fake, 2.0), (Fake, 1.0))",79.9,79.84,79.72,79.66,79.66
"((K+, 1.0), (OH-, 1.0))",79.9,79.84,79.72,79.66,79.66


**Goal**

Get friendlier formats for `ColumnDataSource`.

In [None]:
def unit_df()

In [97]:
molar_df.T.melt()

Unnamed: 0,variable,value
0,"((Al(III), 1.0),)",0.005
1,"((Al(III), 1.0),)",0.005
2,"((Al(III), 1.0),)",0.005
3,"((Al(III), 1.0),)",0.005
4,"((Al(III), 1.0),)",0.005
5,"((Fake, 2.0), (Fake, 1.0))",0.006
6,"((Fake, 2.0), (Fake, 1.0))",0.006
7,"((Fake, 2.0), (Fake, 1.0))",0.006
8,"((Fake, 2.0), (Fake, 1.0))",0.006
9,"((Fake, 2.0), (Fake, 1.0))",0.006


In [98]:
ppm_df.T.melt()

Unnamed: 0,variable,value
0,"((Al(III), 1.0),)",79.9
1,"((Al(III), 1.0),)",79.84
2,"((Al(III), 1.0),)",79.72
3,"((Al(III), 1.0),)",79.66
4,"((Al(III), 1.0),)",79.66
5,"((Fake, 2.0), (Fake, 1.0))",79.9
6,"((Fake, 2.0), (Fake, 1.0))",79.84
7,"((Fake, 2.0), (Fake, 1.0))",79.72
8,"((Fake, 2.0), (Fake, 1.0))",79.66
9,"((Fake, 2.0), (Fake, 1.0))",79.66


### Grouby

TODO...

In [78]:
# Groupby examples

---

# Bokeh Model

https://bokeh.pydata.org/en/latest/docs/reference/core/properties.html#container-properties

**Goal**

Since the information in the indexes provided by `isadream` are too complicated to easily manipulate
within Bokeh. (They do not load into `ColumnDataSource` and even if they did, it is not clearly straightforward to access that index data easily.

Bokeh provides some base classes for this.