# Visualization Design

By: *Tyler Biggs*

---

**Overview**

This notebook will go over the design of the vizualizations. It should also serve as a reference for future custom visualizations.

In [1]:
%load_ext autoreload
%autoreload 2
from pprint import pprint

In [2]:
import pandas as pd
import numpy as np
import re
import collections
import itertools
import functools

import bokeh as bk
import bokeh.io
import bokeh.models
import bokeh.layouts
import bokeh.plotting
bokeh.io.output_notebook()

# import holoviews as hv
# hv.extension('bokeh')

In [3]:
# Path hack to allow imports from the parent directory.
import sys, os
sys.path.insert(0, os.path.abspath('../../'))

In [4]:
from isadream.isadream import modelUtils
from isadream.isadream import io

---

## Dataflow

The data is transfered from the Drupal server as a `.json` file. Those files placed into a directory as the user requests them. That is, all the datasets that a user selects for any given visualziation are placed in a directory. These files are condensed into four dataframes per `.json` file.

In [5]:
# A demo json file is provided.
nmr_json_demo = modelUtils.SIPOS_DEMO
demo_base_path = modelUtils.BASE_PATH
print(nmr_json_demo, '\n', demo_base_path)

/home/tylerbiggs/git/isadream/isadream/demo_data/demo_json/sipos_2006_talanta_nmr_figs.json 
 /home/tylerbiggs/git/isadream/isadream/demo_data


---

In [23]:
demo_json = io.read_idream_json(nmr_json_demo)
node = io.parse_json(demo_json)

test_groups = (
    ('27 Al ppm', 'ppm', ("Al",)),
    ('Total Aluminate Concentration', 'Molar', ("Al", )),
    ('Counter Ion Concentration', 'Molar', ("Na+", "Li+", "Cs+", "K+", )),
    ('Counter Ion', 'Species', ("Na+", "Li+", "Cs+", "K+", )),
    ('Base Concentration', 'Molar', ("OH-", )),
)

node_assays = [a.build_column_data_dicts(test_groups) for a in node.assays]
dfs = [pd.DataFrame(cds) for cds, _ in node_assays]
df = pd.concat(dfs)
df

Unnamed: 0,parent_node,assay_node,sample_node,27 Al ppm,Total Aluminate Concentration,Counter Ion Concentration,Counter Ion,Base Concentration
0,7bc0a784-2f8f-44ba-9041-a7e0d9812f89,e1988d5d-1cc3-4134-baa4-d164caf86b79,e1025e99-bafc-43f9-898a-bab9e40045fa,79.9,0.005,2.93,K+,2.93
1,7bc0a784-2f8f-44ba-9041-a7e0d9812f89,e1988d5d-1cc3-4134-baa4-d164caf86b79,e1025e99-bafc-43f9-898a-bab9e40045fa,79.84,0.005,4.92,K+,4.92
2,7bc0a784-2f8f-44ba-9041-a7e0d9812f89,e1988d5d-1cc3-4134-baa4-d164caf86b79,e1025e99-bafc-43f9-898a-bab9e40045fa,79.72,0.005,6.85,K+,6.85
3,7bc0a784-2f8f-44ba-9041-a7e0d9812f89,e1988d5d-1cc3-4134-baa4-d164caf86b79,e1025e99-bafc-43f9-898a-bab9e40045fa,79.66,0.005,9.13,K+,9.13
4,7bc0a784-2f8f-44ba-9041-a7e0d9812f89,e1988d5d-1cc3-4134-baa4-d164caf86b79,e1025e99-bafc-43f9-898a-bab9e40045fa,79.66,0.005,10.71,K+,10.71
0,9d992d88-f405-4481-94df-b67991dabf6b,c18c233e-0f0b-45d6-99a4-b578c9a89404,b8a342db-4207-41c9-b2ab-491336912c22,79.92,0.005,0.66,Li+,0.66
1,9d992d88-f405-4481-94df-b67991dabf6b,c18c233e-0f0b-45d6-99a4-b578c9a89404,b8a342db-4207-41c9-b2ab-491336912c22,79.84,0.005,1.1,Li+,1.1
2,9d992d88-f405-4481-94df-b67991dabf6b,c18c233e-0f0b-45d6-99a4-b578c9a89404,b8a342db-4207-41c9-b2ab-491336912c22,79.66,0.005,1.64,Li+,1.64
3,9d992d88-f405-4481-94df-b67991dabf6b,c18c233e-0f0b-45d6-99a4-b578c9a89404,b8a342db-4207-41c9-b2ab-491336912c22,79.54,0.005,2.14,Li+,2.14
4,9d992d88-f405-4481-94df-b67991dabf6b,c18c233e-0f0b-45d6-99a4-b578c9a89404,b8a342db-4207-41c9-b2ab-491336912c22,79.36,0.005,2.59,Li+,2.59


In [24]:
metadata = [md for _, md in node_assays]
metadata
md = collections.ChainMap(*metadata)
# dict(metadata)

In [25]:
md.keys()

KeysView(ChainMap(defaultdict(<class 'tuple'>, {'7bc0a784-2f8f-44ba-9041-a7e0d9812f89': ({'$id': 'https://lampdev02.pnl.gov/bigg006/idreamdrupal/', 'title': 'Sipos 2006, Talanta NMR Figures', 'filename': 'sipos_2006_talanta_nmr_figs.json', 'description': 'Extracted figures.', 'submissionDate': '2018-5-25', 'publicReleaseDate': '2006-03-13', 'experimentSubType': 'Al_NMR'}, [None]), 'e1988d5d-1cc3-4134-baa4-d164caf86b79': (NodeInfo(info={'dataFile': 'sipos_2006_talanta_fig_3_KOH.csv'}, name='NodeInfo00075'), [None]), '2a7e4c6b-864d-411a-8cff-a6b8e465e440': {'sampleName': 'Aluminate Solution'}, '18c201f8-cfd5-4d67-b82e-4d53d24dc1d8': {'sampleName': 'Aluminate Solution'}, '977d884f-a11b-4bb2-b4ef-3b25f966cc40': {'sampleName': 'Potassium Hydroxide'}, '728e2f7c-e1b0-4a80-b189-ec24c63a49bf': {'sampleName': 'Potassium Hydroxide'}, 'e1025e99-bafc-43f9-898a-bab9e40045fa': {'sampleName': 'Potassium Hydroxide'}}), defaultdict(<class 'tuple'>, {'9d992d88-f405-4481-94df-b67991dabf6b': ({'$id': 'http

---

In [None]:
def matching_groups(items, label, unit, species):
    return [((label, unit, species), item)
            for item in items
            if modelUtils.query_factor(item, unit)
            or modelUtils.query_species(item, species)]


def process_group(group, factors, samples):
    
    factor_matches = matching_groups(factors, *group)
    
    sample_matches = matching_groups(sampless, *group)
    
    for sample_group, curr_sample in sample_matches:
        
        sample_key = str(uuid.uuid4())
        
        if sample_unit == "Species":
            add_species_col(curr_sample, )
            
        else:
            sample_factors = matching_groups(curr_sample.factors, *sample_group)
            
        sample_factor_matches = matching_groups(self.factors, *group)
        
        assay_sample_matches = matching_groups(self.factors, *group)
        
        for 
    
    pass



def process_sample():
    pass


def process_factor():
    pass


def add_csv_data_col():
    data = self.datafile_dict.get(str(factor.csv_column_index))
    col_data_source[group_label] = data
    col_data_source['sample_node'] = [sample_key for _ in range(self.factor_size)]
    metadata_dictionary[sample_key] = assay_sample.info

    
def add_species_col(sample, species_query):
    matching_species = list(sample.unique_species & set(species_query))[0]
    data = [matching_species 
            for _ in range(self.factor_size)]
    metadata_dictionary[sample_key] = parental_sample.info
    col_data_source['sample_node'] = [sample_key 
                                      for _ in range(self.factor_size)]
    col_data_source[sample_label] = data
    

def add_factor_data():
    data = [factor.value for _ in range(self.factor_size)]
    col_data_source[sample_label] = data
    col_data_source['sample_node'] = [sample_key for _ in range(self.factor_size)]
    metadata_dictionary[sample_key] = assay_sample.info