# Visualization Design

By: *Tyler Biggs*

---

**Overview**

This notebook will go over the design of the vizualizations. It should also serve as a reference for future custom visualizations.

In [1]:
%load_ext autoreload
%autoreload 2
from pprint import pprint

In [2]:
import pandas as pd
import numpy as np
import re
import os
import collections
import itertools
import functools

import bokeh as bk
import bokeh.io
import bokeh.models
import bokeh.layouts
import bokeh.plotting
bokeh.io.output_notebook()

# import holoviews as hv
# hv.extension('bokeh')

In [3]:
# Path hack to allow imports from the parent directory.
import sys, os
# sys.path.insert(0, os.path.abspath('../'))

In [4]:
# sys.path

In [5]:
from isadream.models import utils
from isadream import io
from isadream.models import nodal, elemental

---

## Dataflow

The data is transfered from the Drupal server as a `.json` file. Those files placed into a directory as the user requests them. That is, all the datasets that a user selects for any given visualziation are placed in a directory. These files are condensed into four dataframes per `.json` file.

In [6]:
# A demo json file is provided.
from isadream import config, demos, demo_path

In [7]:
demo_path

'/home/tyler/git/isadream'

---

In [8]:
demo_json = io.read_idream_json(os.path.join(demo_path, demos["SIPOS_NMR"]))
# demo_json
node = io.parse_node_json(demo_json)

In [9]:
# node

In [10]:
node.assays[0].samples[0].get_param_values()

[('comments', []),
 ('factors',
  [Factor(csv_column_index=0, decimal_value=0.0, factor_type='Measurement Condition', name='Factor00014', reference_value='', string_value='', unit_reference='Molar')]),
 ('name', 'SampleNode00017'),
 ('sample_name', 'Potassium Hydroxide'),
 ('sources', []),
 ('species',
  [SpeciesFactor(name='SpeciesFactor00015', species_reference='K+', stoichiometry=1.0),
   SpeciesFactor(name='SpeciesFactor00016', species_reference='OH-', stoichiometry=1.0)])]

In [11]:
# help(node.assays[0].samples[0])

In [12]:
# help(node)

In [13]:
demo_json = io.read_idream_json(os.path.join(demo_path, demos["SIPOS_NMR"]))
node = io.parse_node_json(demo_json)

# test_groups = (
#     ('27 Al ppm', 'ppm', ("Al",)),
#     ('Total Aluminate Concentration', 'Molar', ("Al", )),
#     ('Counter Ion Concentration', 'Molar', ("Na+", "Li+", "Cs+", "K+", )),
#     ('Counter Ion', 'Species', ("Na+", "Li+", "Cs+", "K+", )),
#     ('Base Concentration', 'Molar', ("OH-", )),
# )
x_groups=(('Total Aluminate Concentration', ('Molar',), ("Al",)),
          ('Counter Ion Concentration', ('Molar',),
           ("Na+", "Li+", "Cs+", "K+")),
          ('Counter Ion', ('Species',), ("Na+", "Li+", "Cs+", "K+",)),
          ('Base Concentration', ('Molar',), ("OH-",)))

y_groups=(('27 Al ppm', ('ppm',), ("Al",)),)

test_groups = x_groups + y_groups

node_assays = [io.build_node_data(a, test_groups) for a in node.assays]
node_assays
dfs = [pd.DataFrame(cds) for cds, _ in node_assays]
df = pd.concat(dfs)
df

Unnamed: 0,parent_node,assay_node,Total Aluminate Concentration,sample_node,Counter Ion Concentration,Counter Ion,Base Concentration,27 Al ppm
0,0be3e6bf-256f-4bdd-916b-bdd5cdfb8161,7827b31a-4f58-4091-b4b3-598561f9dfbf,0.005,a1db6aeb-c628-4fe1-8733-623334ac8ca0,2.93,K+,2.93,79.9
1,0be3e6bf-256f-4bdd-916b-bdd5cdfb8161,7827b31a-4f58-4091-b4b3-598561f9dfbf,0.005,a1db6aeb-c628-4fe1-8733-623334ac8ca0,4.92,K+,4.92,79.84
2,0be3e6bf-256f-4bdd-916b-bdd5cdfb8161,7827b31a-4f58-4091-b4b3-598561f9dfbf,0.005,a1db6aeb-c628-4fe1-8733-623334ac8ca0,6.85,K+,6.85,79.72
3,0be3e6bf-256f-4bdd-916b-bdd5cdfb8161,7827b31a-4f58-4091-b4b3-598561f9dfbf,0.005,a1db6aeb-c628-4fe1-8733-623334ac8ca0,9.13,K+,9.13,79.66
4,0be3e6bf-256f-4bdd-916b-bdd5cdfb8161,7827b31a-4f58-4091-b4b3-598561f9dfbf,0.005,a1db6aeb-c628-4fe1-8733-623334ac8ca0,10.71,K+,10.71,79.66
0,31d30d90-d7d2-4c6e-a621-edb0d81873d3,43cb64f6-84b2-4a80-9cd7-17c12d676fd9,0.005,4581d8df-a69f-4d82-9bc5-ac0ba116da38,0.66,Li+,0.66,79.92
1,31d30d90-d7d2-4c6e-a621-edb0d81873d3,43cb64f6-84b2-4a80-9cd7-17c12d676fd9,0.005,4581d8df-a69f-4d82-9bc5-ac0ba116da38,1.1,Li+,1.1,79.84
2,31d30d90-d7d2-4c6e-a621-edb0d81873d3,43cb64f6-84b2-4a80-9cd7-17c12d676fd9,0.005,4581d8df-a69f-4d82-9bc5-ac0ba116da38,1.64,Li+,1.64,79.66
3,31d30d90-d7d2-4c6e-a621-edb0d81873d3,43cb64f6-84b2-4a80-9cd7-17c12d676fd9,0.005,4581d8df-a69f-4d82-9bc5-ac0ba116da38,2.14,Li+,2.14,79.54
4,31d30d90-d7d2-4c6e-a621-edb0d81873d3,43cb64f6-84b2-4a80-9cd7-17c12d676fd9,0.005,4581d8df-a69f-4d82-9bc5-ac0ba116da38,2.59,Li+,2.59,79.36


In [19]:
help(bk.transform.factor_cmap)

Help on function factor_cmap in module bokeh.transform:

factor_cmap(field_name, palette, factors, start=0, end=None, nan_color='gray')
    Create a ``DataSpec`` dict to apply a client-side
    ``CategoricalColorMapper`` transformation to a ``ColumnDataSource``
    column.
    
    Args:
        field_name (str) : a field name to configure ``DataSpec`` with
    
        palette (seq[color]) : a list of colors to use for colormapping
    
        factors (seq) : a sequences of categorical factors corresponding to
            the palette
    
        start (int, optional) : a start slice index to apply when the column
            data has factors with multiple levels. (default: 0)
    
        end (int, optional) : an end slice index to apply when the column
            data has factors with multiple levels. (default: None)
    
        nan_color (color, optional) : a default color to use when mapping data
            from a column does not succeed (default: "gray")
    
    Returns:
   

In [15]:
metadata = [md for _, md in node_assays]
metadata
md = collections.ChainMap(*metadata)
md

ChainMap({'01aa716c-34f1-48df-bfa7-c313f79af049': (None, [Comment(body='I manually pulled this points out with a web tool.', comment_title='Study level comment on Sipos 2006.', name='Comment00062')]), 'd530ea6c-1251-4446-a659-5fab4ebfa7ef': ('Potassium Hydroxide Assay', [Comment(body='I manually pulled this points out with a web tool.', comment_title='Data extraction method.', name='Comment00069')]), 'ab3b7c12-8873-4bf8-9293-7aa5461c629a': SampleNode(comments=[], factors=[Factor(csv_column_index=0, decimal_value=0.005, factor_type='Measurement Condition', name='Factor00063', reference_value='', string_value='', unit_reference='Molar')], name='SampleNode00068', sample_name='Aluminate Solution', sources=[SourceNode(comments=[], factors=[Factor(csv_column_index=0, decimal_value=0.98, factor_type='Material Property', name='Factor00065', reference_value='Purity by Weight', string_value='', unit_reference='Percent')], name='SourceNode00067', source_name='Aluminum Wire', species=[SpeciesFacto

---