# Visualization Design

By: *Tyler Biggs*

---

**Overview**

This notebook will go over the design of the vizualizations. It should also serve as a reference for future custom visualizations.

In [1]:
%load_ext autoreload
%autoreload 2
from pprint import pprint

In [2]:
import pandas as pd
import numpy as np
import re
import collections
import itertools
import functools

import bokeh as bk
import bokeh.io
import bokeh.models
import bokeh.layouts
import bokeh.plotting
bokeh.io.output_notebook()

# import holoviews as hv
# hv.extension('bokeh')

In [3]:
# Path hack to allow imports from the parent directory.
import sys, os
sys.path.insert(0, os.path.abspath('../../'))

In [92]:
from isadream.isadream.models import utils
from isadream.isadream import io

In [99]:
ll = [1, 2, 3, 4]

for index, x in enumerate(ll):
    print(index, x)
    x = 2

0 1
1 2
2 3
3 4


---

## Dataflow

The data is transfered from the Drupal server as a `.json` file. Those files placed into a directory as the user requests them. That is, all the datasets that a user selects for any given visualziation are placed in a directory. These files are condensed into four dataframes per `.json` file.

In [5]:
# A demo json file is provided.
nmr_json_demo = utils.SIPOS_DEMO
demo_base_path = utils.BASE_PATH
print(nmr_json_demo, '\n', demo_base_path)

/home/tylerbiggs/git/isadream/isadream/demo_data/demo_json/sipos_2006_talanta_nmr_figs.json 
 /home/tylerbiggs/git/isadream/isadream/demo_data/


In [6]:
demo_json = io.read_idream_json(nmr_json_demo)
node = io.parse_json(demo_json)

---

## Viewing the data in each Assay (datafile) per .json

In [7]:
node_assays = [a for a in node.assays]

---

### Getting Subsets

In [8]:
def normalize(value, size):
    return tuple(value for _ in range(size))

In [9]:
print([(x, y)
       for x in range(2)
       for y in range(10)])

[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), (0, 8), (0, 9), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9)]


In [136]:
def group_factors(node, group):
    
    label, unit, species = group
    
    out_factors = list()
    
    for factor in node.factors:
        if factor.query(unit):
            out_factors.append((label, factor, species))
            
    return out_factors
    

def group_samples(node, group):
    
    label, unit, species = group
    
    out_samples =  list()
    
    if unit == 'Species':
        query = utils.query_node_species
    else:
        query = utils.query_node_factors
        
    for sample in node.samples:
        if query(sample, unit):
            out_samples.append((label, sample))
                
    return out_samples


def build_factor_data(assay, factor):
    if factor.is_csv_index:
        data = assay.datafile_dict.get(factor.csv_index)
    else:
        data = normalize(factor.dict_value, size=assay.factor_size)
    return data


def build_cds(assays, x_groups, y_groups):
    """
    groups: (label, unit, {species})
    """
    cds = collections.defaultdict(list)
    
    for assay in assays:
        
        _normalize = functools.partial(normalize, size=assay.factor_size)
        
        # Each group iteration will be one set of row additions to the group keys.
        for xg, yg in itertools.zip_longest(x_groups, y_groups):
            
            if xg is not None:
                
                group_label, group_unit, group_species = xg
                
                if group_unit == 'Species':
                    pass
                
                else:
                for _, sample in group_samples(assay, xg):
                    sample_factors = group_factors(sample, xg) + group_factors(assay, xg)
#                     print(sample_factors)
                    for group_label, factor, group_species in sample_factors:
                        
                        data = build_factor_data(assay, factor)
                        cds[group_label].append(data)
                        
            if yg is not None:
                for _, sample in group_samples(assay, yg):

                    sample_factors = group_factors(sample, yg) + group_factors(assay, yg)

                    for group_label, factor, group_species in sample_factors:
                        data = build_factor_data(assay, factor)
                        cds[group_label].append(data)

        return cds

In [137]:
y_groups = (
    ('27 Al ppm', 'ppm', ("Al",)),
)

x_groups = (
    ('Total Aluminate Concentration', 'Molar', ("Al", )),
    ('Counter Ion Concentration', 'Molar', ("Na+", "Li+", "Cs+", "K+", )),
    ('Counter Ion', 'Species', ("Na+", "Li+", "Cs+", "K+", )),
    ('Base Concentration', 'Molar', ("OH-", )),
)

demo_json = io.read_idream_json(nmr_json_demo)
node = io.parse_json(demo_json)

build_cds(assays=node.assays, x_groups=x_groups, y_groups=y_groups)

('Total Aluminate Concentration', 'Molar', ('Al',))
('Counter Ion Concentration', 'Molar', ('Na+', 'Li+', 'Cs+', 'K+'))
('Counter Ion', 'Species', ('Na+', 'Li+', 'Cs+', 'K+'))
('Base Concentration', 'Molar', ('OH-',))


defaultdict(list,
            {'Total Aluminate Concentration': [(2.93, 4.92, 6.85, 9.13, 10.71),
              (0.005, 0.005, 0.005, 0.005, 0.005)],
             'Counter Ion Concentration': [(2.93, 4.92, 6.85, 9.13, 10.71),
              (0.005, 0.005, 0.005, 0.005, 0.005)],
             'Base Concentration': [(2.93, 4.92, 6.85, 9.13, 10.71),
              (0.005, 0.005, 0.005, 0.005, 0.005)]})

In [87]:
pd.DataFrame(build_cds(assays=node.assays, x_groups=x_groups, y_groups=y_groups))

Unnamed: 0,Total Aluminate Concentration,Counter Ion Concentration,Base Concentration
0,"(2.93, 4.92, 6.85, 9.13, 10.71)","(2.93, 4.92, 6.85, 9.13, 10.71)","(2.93, 4.92, 6.85, 9.13, 10.71)"
1,"(0.005, 0.005, 0.005, 0.005, 0.005)","(0.005, 0.005, 0.005, 0.005, 0.005)","(0.005, 0.005, 0.005, 0.005, 0.005)"


In [95]:
# def build_cds(assays, experiment, species=None):
#     """
    
#     :param assays: A list of isadream.models.containers.AssayNode objects.
#     :param experiment: A string value referencing a factor type. This can
#         reference a factor type, a reference value or a unit reference.
#     :param species: A list of species that will be used to filter the
#         results. Only values with a match of the given species will be
#         added to the output dictionary.
        
#     column_data_source = {
#         (factor_tuple) = [data],
#         (species_tuple) = [species],
#         (sample_hash) = [hash(sample)],
#         (assay_hash) = [hash(assay)]
#     }
        
#     """
    
#     basic_keys = ['sample_hash', 'assay_hash']
#     all_factors = list(itertools.chain.from_iterable([assay.all_factors for assay in assays]))
#     cds = pd.DataFrame(columns=basic_keys)
#     cds = cds.set_index(basic_keys)
    
#     main_frames = []
    
#     def normalize(value, size):
#         return [value for _ in range(size)]
    
#     for assay in assays:
        
        
        
        
# #         assay_frames = list()
        
# #         # Get this assay nodes hash.
# #         assay_hash = str(hash(assay))
        
# #         # Redefine normalize() with a new default value.
# #         _normalize = functools.partial(normalize, size=assay.factor_size)
        
# #         for sample in assay.all_samples:
             
# #             # Get the (unique) csv data associated with this sample.
# #             sample_csv_factors = (set(sample.all_factors) | set(all_factors)) \
# #                                  & set(assay.csv_index_factors)
# # #             sample_csv_factors = sample.all_factors + all_factors + assay.csv_index_factors
            
# #             # Get all the (unique) factors of this sample that are not csv indexes.
# #             sample_factors = set(sample.all_factors) - set(sample_csv_factors)
            
# #             # Get all the species associated with this sample.
# #             sample_species = sample.all_species
# #             sample_species = [(s.dict_label, s.dict_value) for s in sample_species]
            
# #             # Get this samples hash.
# #             sample_hash = str(hash(sample)),
            
# #             # Add the data from the .csv files.
# #             df = pd.DataFrame()
# # #             df['sample_hash'] = _normalize(sample_hash)
# # #             df['assay_hash'] = _normalize(assay_hash)
# # #             df = df.set_index(basic_keys)
            
# #             species_maps = [{s.dict_label: s.dict_value} for s in sample.all_species]
# #             species_chain = collections.ChainMap(species_maps)
            
# #             species_tuple = tuple()
# #             for species_list in species_chain.maps:
# #                 for species in species_list:
# #                     for s_ref, stoich in species.items():
# #                         species_tuple += (s_ref, stoich),
            
# #             for sample_csv_factor in sample_csv_factors:
            
# #                 # Ensure the csv index is a string.
# #                 csv_key = str(sample_csv_factor.csv_index)
# #                 factor_label = sample_csv_factor.dict_label
# #                 data = assay.datafile_dict.get(csv_key)
# #                 print(data)
# #                 key = factor_label + species_tuple
# #                 df[key] = data

# #                 # Add the data from the remaining assay and sample factors.
# # #             for sample_factor in sample_factors:

# # #                 # Build the data dictionary to append to the dataframe.
# # #                 factor_label = sample_factor.dict_label
# # #                 data = _normalize(sample_factor.dict_value)
# # #                 print(data)
# # #                 key = factor_label + species_tuple
# # #                 df[key] = data
                    
# #             assay_frames.append(df)
# #             display(df)
# #             break
        
# #     assay_df = pd.concat(assay_frames)
# #     display(assay_df)
# #     main_frames.append(assay_df)
# #     display(main_frames)
# #     main_df = pd.concat(main_frames)
# #     display(main_df)
# #     return cds

In [197]:
build_cds(node_assays, experiment=("ppm", "27_Al"))

[79.9, 79.84, 79.72, 79.66, 79.66]
[2.93, 4.92, 6.85, 9.13, 10.71]


Unnamed: 0,"(Measurement, 27_Al, ppm, (OH-, 1.0), (K+, 1.0))","(Measurement Condition, Molar, (OH-, 1.0), (K+, 1.0))"
0,79.9,2.93
1,79.84,4.92
2,79.72,6.85
3,79.66,9.13
4,79.66,10.71


[79.9, 79.84, 79.72, 79.66, 79.66]
[2.93, 4.92, 6.85, 9.13, 10.71]


Unnamed: 0,"(Measurement, 27_Al, ppm, (Al(III), 1.0), (Al(III), 1.0))","(Measurement Condition, Molar, (Al(III), 1.0), (Al(III), 1.0))"
0,79.9,2.93
1,79.84,4.92
2,79.72,6.85
3,79.66,9.13
4,79.66,10.71


[79.9, 79.84, 79.72, 79.66, 79.66]
[2.93, 4.92, 6.85, 9.13, 10.71]


Unnamed: 0,"(Measurement, 27_Al, ppm, (Fake, 1.0), (Fake, 2.0))","(Measurement Condition, Molar, (Fake, 1.0), (Fake, 2.0))"
0,79.9,2.93
1,79.84,4.92
2,79.72,6.85
3,79.66,9.13
4,79.66,10.71


[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78.83, 78.47]
[0.66, 1.1, 1.64, 2.14, 2.59, 3.11, 3.59, 4.11]


Unnamed: 0,"(Measurement, 27_Al, ppm, (OH-, 1.0), (Li+, 1.0))","(Measurement Condition, Molar, (OH-, 1.0), (Li+, 1.0))"
0,79.92,0.66
1,79.84,1.1
2,79.66,1.64
3,79.54,2.14
4,79.36,2.59
5,79.13,3.11
6,78.83,3.59
7,78.47,4.11


[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78.83, 78.47]
[0.66, 1.1, 1.64, 2.14, 2.59, 3.11, 3.59, 4.11]


Unnamed: 0,"(Measurement, 27_Al, ppm, (Al(III), 1.0), (Al(III), 1.0))","(Measurement Condition, Molar, (Al(III), 1.0), (Al(III), 1.0))"
0,79.92,0.66
1,79.84,1.1
2,79.66,1.64
3,79.54,2.14
4,79.36,2.59
5,79.13,3.11
6,78.83,3.59
7,78.47,4.11


[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78.83, 78.47]
[0.66, 1.1, 1.64, 2.14, 2.59, 3.11, 3.59, 4.11]


Unnamed: 0,"(Measurement, 27_Al, ppm, (Fake, 1.0), (Fake, 2.0))","(Measurement Condition, Molar, (Fake, 1.0), (Fake, 2.0))"
0,79.92,0.66
1,79.84,1.1
2,79.66,1.64
3,79.54,2.14
4,79.36,2.59
5,79.13,3.11
6,78.83,3.59
7,78.47,4.11


[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74.53, 73.14, 71.32]
[4.98, 0.93, 6.97, 8.96, 10.95, 12.99, 14.92, 16.97, 18.92]


Unnamed: 0,"(Measurement, 27_Al, ppm, (Na+, 1.0), (OH-, 1.0))","(Measurement Condition, Molar, (Na+, 1.0), (OH-, 1.0))"
0,79.54,4.98
1,79.98,0.93
2,79.13,6.97
3,78.45,8.96
4,77.67,10.95
5,76.47,12.99
6,74.53,14.92
7,73.14,16.97
8,71.32,18.92


[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74.53, 73.14, 71.32]
[4.98, 0.93, 6.97, 8.96, 10.95, 12.99, 14.92, 16.97, 18.92]


Unnamed: 0,"(Measurement, 27_Al, ppm, (Al(III), 1.0), (Al(III), 1.0))","(Measurement Condition, Molar, (Al(III), 1.0), (Al(III), 1.0))"
0,79.54,4.98
1,79.98,0.93
2,79.13,6.97
3,78.45,8.96
4,77.67,10.95
5,76.47,12.99
6,74.53,14.92
7,73.14,16.97
8,71.32,18.92


[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74.53, 73.14, 71.32]
[4.98, 0.93, 6.97, 8.96, 10.95, 12.99, 14.92, 16.97, 18.92]


Unnamed: 0,"(Measurement, 27_Al, ppm, (Fake, 1.0), (Fake, 2.0))","(Measurement Condition, Molar, (Fake, 1.0), (Fake, 2.0))"
0,79.54,4.98
1,79.98,0.93
2,79.13,6.97
3,78.45,8.96
4,77.67,10.95
5,76.47,12.99
6,74.53,14.92
7,73.14,16.97
8,71.32,18.92


In [52]:
for na in node_assays:
    pprint(na)
    break

<isadream.isadream.models.nodal.AssayNode object at 0x7f5826b0d588>


In [53]:
for na in node_assays:
    display(na.as_dict)
    df = pd.DataFrame(na)
    df.columns = pd.MultiIndex.from_tuples(df.columns)
    display(df)
#     df

{("('K+', 1.0)__('OH-', 1.0)", 'Measurement', '27_Al', 'ppm'): [79.9,
  79.84,
  79.72,
  79.66,
  79.66],
 ("('K+', 1.0)__('OH-', 1.0)", 'Measurement Condition', 'Molar'): [2.93,
  4.92,
  6.85,
  9.13,
  10.71],
 ("('Al(III)', 1.0)", 'Measurement', '27_Al', 'ppm'): [79.9,
  79.84,
  79.72,
  79.66,
  79.66],
 ("('Al(III)', 1.0)", 'Measurement Condition', 'Molar'): [0.005,
  0.005,
  0.005,
  0.005,
  0.005],
 ("('Al(III)', 1.0)",
  'Material Property',
  'Purity by Weight',
  'Percent'): [0.98, 0.98, 0.98, 0.98, 0.98],
 ("('Fake', 2.0)__('Fake', 1.0)", 'Measurement', '27_Al', 'ppm'): [79.9,
  79.84,
  79.72,
  79.66,
  79.66],
 ("('Fake', 2.0)__('Fake', 1.0)", 'Measurement Condition', 'Molar'): [0.006,
  0.006,
  0.006,
  0.006,
  0.006],
 ("('Fake', 2.0)__('Fake', 1.0)",
  'Material Property',
  'Density',
  'g/cm^3'): [1.05, 1.05, 1.05, 1.05, 1.05],
 ("('Fake', 2.0)__('Fake', 1.0)",
  'Material Property',
  'Poor',
  'Quality'): ['Poor', 'Poor', 'Poor', 'Poor', 'Poor']}

ValueError: DataFrame constructor not properly called!

In [68]:
assay_chain_map = collections.ChainMap(*[a.as_dict for a in node.assays]).maps
df = pd.DataFrame.from_records(assay_chain_map)
df.columns = pd.MultiIndex.from_tuples(df.columns)
# df.apply(lambda x: len(x))
df

Unnamed: 0_level_0,"('Al(III)', 1.0)","('Al(III)', 1.0)","('Al(III)', 1.0)","('Fake', 1.0)__('Fake', 2.0)","('Fake', 1.0)__('Fake', 2.0)","('Fake', 1.0)__('Fake', 2.0)","('Fake', 1.0)__('Fake', 2.0)","('K+', 1.0)__('OH-', 1.0)","('K+', 1.0)__('OH-', 1.0)","('Na+', 1.0)__('OH-', 1.0)","('Na+', 1.0)__('OH-', 1.0)","('OH-', 1.0)__('Li+', 1.0)","('OH-', 1.0)__('Li+', 1.0)"
Unnamed: 0_level_1,Material Property,Measurement,Measurement Condition,Material Property,Material Property,Measurement,Measurement Condition,Measurement,Measurement Condition,Measurement,Measurement Condition,Measurement,Measurement Condition
Unnamed: 0_level_2,Percent,ppm,Molar,Quality,g/cm^3,ppm,Molar,ppm,Molar,ppm,Molar,ppm,Molar
0,"[0.98, 0.98, 0.98, 0.98, 0.98]","[79.9, 79.84, 79.72, 79.66, 79.66]","[0.005, 0.005, 0.005, 0.005, 0.005]","[Poor, Poor, Poor, Poor, Poor]","[1.05, 1.05, 1.05, 1.05, 1.05]","[79.9, 79.84, 79.72, 79.66, 79.66]","[0.006, 0.006, 0.006, 0.006, 0.006]","[79.9, 79.84, 79.72, 79.66, 79.66]","[2.93, 4.92, 6.85, 9.13, 10.71]",,,,
1,"[0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98]","[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78....","[0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.0...","[Poor, Poor, Poor, Poor, Poor, Poor, Poor, Poor]","[1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05]","[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78....","[0.006, 0.006, 0.006, 0.006, 0.006, 0.006, 0.0...",,,,,"[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78....","[0.66, 1.1, 1.64, 2.14, 2.59, 3.11, 3.59, 4.11]"
2,"[0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.9...","[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74....","[0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.0...","[Poor, Poor, Poor, Poor, Poor, Poor, Poor, Poo...","[1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.05, 1.0...","[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74....","[0.006, 0.006, 0.006, 0.006, 0.006, 0.006, 0.0...",,,"[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74....","[4.98, 0.93, 6.97, 8.96, 10.95, 12.99, 14.92, ...",,


In [66]:
def

for col in df.columns:
#     display(df[col].values)
    if 
    vector_length = max((v) for v in df[col].values)
    print(vector_length)
#     for v in df[col].values:
#         print(type(v))

AttributeError: 'list' object has no attribute 'max'

In [96]:
f_key = 'ppm'
val_cols = [k for k in df.columns if f_key in k]
key_cols = [k for k in df.columns if k not in val_cols]
# pprint(val_cols)
# pprint(key_cols)

In [101]:
assay_chain_map = collections.ChainMap(*[a.as_dict for a in node.assays]).maps
df = pd.DataFrame.from_records(assay_chain_map)

df = df.reindex(val_cols, axis=1).T
df = pd.DataFrame(df.stack())
df
# df.columns = pd.MultiIndex.from_tuples(df.columns)

Unnamed: 0,Unnamed: 1,0
"(('Al(III)', 1.0), Measurement, ppm)",0,"[79.9, 79.84, 79.72, 79.66, 79.66]"
"(('Al(III)', 1.0), Measurement, ppm)",1,"[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78...."
"(('Al(III)', 1.0), Measurement, ppm)",2,"[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74...."
"(('Fake', 1.0)__('Fake', 2.0), Measurement, ppm)",0,"[79.9, 79.84, 79.72, 79.66, 79.66]"
"(('Fake', 1.0)__('Fake', 2.0), Measurement, ppm)",1,"[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78...."
"(('Fake', 1.0)__('Fake', 2.0), Measurement, ppm)",2,"[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74...."
"(('K+', 1.0)__('OH-', 1.0), Measurement, ppm)",0,"[79.9, 79.84, 79.72, 79.66, 79.66]"
"(('Na+', 1.0)__('OH-', 1.0), Measurement, ppm)",2,"[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74...."
"(('OH-', 1.0)__('Li+', 1.0), Measurement, ppm)",1,"[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78...."


In [102]:
assay_chain_map = collections.ChainMap(*[a.as_dict for a in node.assays]).maps
df = pd.DataFrame.from_records(assay_chain_map)

df = df.reindex(key_cols, axis=1).T
df = pd.DataFrame(df.stack())
df

Unnamed: 0,Unnamed: 1,0
"(('Al(III)', 1.0), Material Property, Percent)",0,"[0.98, 0.98, 0.98, 0.98, 0.98]"
"(('Al(III)', 1.0), Material Property, Percent)",1,"[0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98]"
"(('Al(III)', 1.0), Material Property, Percent)",2,"[0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.98, 0.9..."
"(('Al(III)', 1.0), Measurement Condition, Molar)",0,"[0.005, 0.005, 0.005, 0.005, 0.005]"
"(('Al(III)', 1.0), Measurement Condition, Molar)",1,"[0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.0..."
"(('Al(III)', 1.0), Measurement Condition, Molar)",2,"[0.005, 0.005, 0.005, 0.005, 0.005, 0.005, 0.0..."
"(('Fake', 1.0)__('Fake', 2.0), Material Property, Quality)",0,"[Poor, Poor, Poor, Poor, Poor]"
"(('Fake', 1.0)__('Fake', 2.0), Material Property, Quality)",1,"[Poor, Poor, Poor, Poor, Poor, Poor, Poor, Poor]"
"(('Fake', 1.0)__('Fake', 2.0), Material Property, Quality)",2,"[Poor, Poor, Poor, Poor, Poor, Poor, Poor, Poo..."
"(('Fake', 1.0)__('Fake', 2.0), Material Property, g/cm^3)",0,"[1.05, 1.05, 1.05, 1.05, 1.05]"


In [99]:
# cds = {}
# for samp in bkdf:
#     key = '__'.join(str(x) for x in samp)
#     cds[key] = np.concatenate(bkdf[samp].values)
    
# cds

In [93]:
def fill_nan(array):
    """Assumes that all vectors in the given array shoulde be of the same length.
    Fills NaN values with NaN arrays of the vector length."""
    vector_length = max(len(v) for v in array)
    for a in array:
        try:
            print(len(a))
        except TypeError as error:
            pass
        
def to_cds(query='ppm', drupal_node=node):
    cds = {}
    
    assay_chain_map = collections.ChainMap(*[a.as_dict for a in node.assays]).maps
    df = pd.DataFrame.from_records(assay_chain_map)
    df.columns = pd.MultiIndex.from_tuples(df.columns)
#     df = df.dropna(axis=1)
    
    val_cols = [k for k in df.columns if query in k]
    key_cols = [k for k in df.columns if k not in val_cols]
    
    tdf = df.reindex(val_cols, axis=1).T
    display(pd.DataFrame(tdf.stack()))
    for samp in tdf:
        display(tdf.T)
        display(samp)
        key = 'val_dim__' +  '__'.join([str(x) for x in samp])
        for col in tdf:
            vectors = [v for v in tdf[col].values if v]
            fill_nan(vectors)
#             display(vectors)
            cds[key] = np.concatenate([v for v in tdf[col].values if v])

#     tdf = df.reindex(key_cols, axis=1)      
#     for samp in tdf:
# #         display(tdf)
# #         display(samp)
#         key = 'key_dim__' +  '__'.join([str(x) for x in samp])
        
#         for col in tdf:
#             cds[key] = np.concatenate([v for v in tdf[col].values if v])
        
    
    return cds

In [94]:
node_cds = to_cds()
node_cds

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,0
"('Al(III)', 1.0)",Measurement,ppm,0,"[79.9, 79.84, 79.72, 79.66, 79.66]"
"('Al(III)', 1.0)",Measurement,ppm,1,"[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78...."
"('Al(III)', 1.0)",Measurement,ppm,2,"[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74...."
"('Fake', 1.0)__('Fake', 2.0)",Measurement,ppm,0,"[79.9, 79.84, 79.72, 79.66, 79.66]"
"('Fake', 1.0)__('Fake', 2.0)",Measurement,ppm,1,"[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78...."
"('Fake', 1.0)__('Fake', 2.0)",Measurement,ppm,2,"[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74...."
"('K+', 1.0)__('OH-', 1.0)",Measurement,ppm,0,"[79.9, 79.84, 79.72, 79.66, 79.66]"
"('Na+', 1.0)__('OH-', 1.0)",Measurement,ppm,2,"[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74...."
"('OH-', 1.0)__('Li+', 1.0)",Measurement,ppm,1,"[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78...."


Unnamed: 0_level_0,"('Al(III)', 1.0)","('Fake', 1.0)__('Fake', 2.0)","('K+', 1.0)__('OH-', 1.0)","('Na+', 1.0)__('OH-', 1.0)","('OH-', 1.0)__('Li+', 1.0)"
Unnamed: 0_level_1,Measurement,Measurement,Measurement,Measurement,Measurement
Unnamed: 0_level_2,ppm,ppm,ppm,ppm,ppm
0,"[79.9, 79.84, 79.72, 79.66, 79.66]","[79.9, 79.84, 79.72, 79.66, 79.66]","[79.9, 79.84, 79.72, 79.66, 79.66]",,
1,"[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78....","[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78....",,,"[79.92, 79.84, 79.66, 79.54, 79.36, 79.13, 78...."
2,"[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74....","[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74....",,"[79.54, 79.98, 79.13, 78.45, 77.67, 76.47, 74....",


0

TypeError: 'int' object is not iterable

In [258]:
keys = [k for k in node_cds.keys()]
keys

["val_dim__Measurement__ppm__('Al(III)', 1.0)",
 "val_dim__Measurement__ppm__('Fake', 1.0)",
 "key_dim__Material Property__Percent__('Al(III)', 1.0)",
 "key_dim__Material Property__Quality__('Fake', 1.0)",
 "key_dim__Material Property__g/cm^3__('Fake', 1.0)",
 "key_dim__Measurement Condition__Molar__('Al(III)', 1.0)",
 "key_dim__Measurement Condition__Molar__('Fake', 1.0)"]

In [262]:
fig = bk.plotting.figure()
fig.circle(
    y="val_dim__Measurement__ppm__('Al(III)', 1.0)",
    x="key_dim__Measurement Condition__Molar__('Al(III)', 1.0)",
    source=bk.models.ColumnDataSource(to_cds())
)
bk.plotting.show(fig)

In [52]:
node_dict = dict()
for assay in node.assays:
    node_dict = dict(**node_dict, **assay.as_dict)

node_dict

TypeError: type object got multiple values for keyword argument '-9223363289947229531.8746907551759'

In [9]:
molar_df = df.xs(('Measurement_Condition', 'Molar'))
molar_df

Unnamed: 0,0,1,2,3,4
"((Al(III), 1.0),)",0.005,0.005,0.005,0.005,0.005
"((Fake, 2.0), (Fake, 1.0))",0.006,0.006,0.006,0.006,0.006
"((K+, 1.0), (OH-, 1.0))",2.93,4.92,6.85,9.13,10.71


In [10]:
ppm_df = df.xs(('Measurement', 'ppm'))
ppm_df

Unnamed: 0,0,1,2,3,4
"((Al(III), 1.0),)",79.9,79.84,79.72,79.66,79.66
"((Fake, 2.0), (Fake, 1.0))",79.9,79.84,79.72,79.66,79.66
"((K+, 1.0), (OH-, 1.0))",79.9,79.84,79.72,79.66,79.66


**Goal**

Get friendlier formats for `ColumnDataSource`.

In [11]:
def build_array(factor, assay):
    
    assay_df = pd.DataFrame.from_records(assay.column_data_source)
    assay_df.columns = pd.MultiIndex.from_tuples(assay_df.columns)
    assay_df = assay_df.T
    
    factor_df = assay_df.xs(factor)
    factor_df = factor_df.T.melt(var_name='species', value_name=str(factor))
    factor_df = factor_df.set_index('species')
    
    return factor_df

In [12]:
# for assay in node.assays:
#     display(build_array(('Measurement', 'ppm'), assay))

In [13]:
# for assay in node.assays:
#     display(build_array(('Measurement_Condition', 'Molar'), assay))

### Grouby

TODO...

In [14]:
# Groupby examples

---

# Bokeh Model

https://bokeh.pydata.org/en/latest/docs/reference/core/properties.html#container-properties

In [None]:
layout = []

for assay in node.assays:
    xs = build_array(('Measurement_Condition', 'Molar'), assay)
    
    ys = build_array(('Measurement', 'ppm'), assay)
    
    layout.append(hv.Scatter((xs, ys)))
    
hv.Layout(layout)