# Data Classes

**Overview**

There are five top level groups in the `.json` file that contain information about a given *Node*. There are read into, and prepared by the `Model` class. The top level groups are:

+ nodeInformation
+ studyFactors
+ studySamples
+ assays
+ comments

*Setting up auto-reloading of the isadream package.*

---

# Plan Output of the Class

I want the class to build and return a pandas dataframe upon request. I will have a controller that handles the multiple nodes, and hands them off to the Bokeh visualizations.

```python
# Create the node object from a json path.
demo_node = io.read_dream_json(demo_json)
```

Request the data. We can assume that there is at least some usefull information in this node, as it was passed by the Drupal query. This should return a dataframe for every `Assay` object attached to the `DrupalNode`.


```python
# An individual frame will be constructed by a keyword query.
demo_node.build_frame(**query_kwargs)
```

The controller then takes over and combines the dataframes for a Bokeh application.

```python
from controller import controller


# Create an instance of the controller class.
control = controller.Controller()

# Create all the individual frames. Drop those that fail.
frames = control.build_frames(*nodes, **query_kwargs)

# Combine the frames to a single dataframe.
# Convert that single frame to a format that Bokeh can read.
combined_frames = control.combine_frames(frames)
```

In [1]:
# Path hack to allow imports from the parent directory.
import sys, os
from pprint import pprint
sys.path.insert(0, os.path.abspath('../../'))

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import pandas as pd

In [72]:
from isadream.isadream.models import utils, elemental, containers
from isadream.isadream import io

In [100]:
demo_json = utils.SIPOS_DEMO
demo_json = io.read_idream_json(demo_json)
node = io.parse_json(demo_json)

In [101]:
# node

In [102]:
node

<isadream.isadream.models.nodal.DrupalNode at 0x7fe119ac4f28>

In [103]:
node.info

{'$id': 'https://lampdev02.pnl.gov/bigg006/idreamdrupal/', 'title': 'Sipos 2006, Talanta NMR Figures', 'filename': 'sipos_2006_talanta_nmr_figs.json', 'description': 'Extracted figures.', 'submissionDate': '2018-5-25', 'publicReleaseDate': '2006-03-13', 'experimentSubType': 'Al_NMR'}

In [106]:
for assay in node.assays:
#     print(assay.all_species)
#     print('-'*20)
#     print(assay.all_factors)
#     print('-'*20)
#     print(assay.csv_index_factors)
#     pprint(assay.column_data_source)
#     print('-'*20)
    display(pd.DataFrame.from_records(assay.column_data_source).T)
#     break
        
#     pprint([*utils.normalize(assay.as_dict)])
    
    
#     normalized_dict = list(utils.normalize(assay.as_dict))
    # Read the data into a pandas DataFrame.
#     normalized_df = pd.DataFrame.from_records(normalized_dict)
#     return normalized_df
#     print(assay.all_factors)
#     break
# normalized_dict
# normalized_df

Unnamed: 0,0,1,2,3,4
"((Material_Property, Density, g/cm^3), ((Fake, 1.0), (Fake, 2.0)))",1.05,1.05,1.05,1.05,1.05
"((Material_Property, Poor, Quality), ((Fake, 1.0), (Fake, 2.0)))",Poor,Poor,Poor,Poor,Poor
"((Material_Property, Purity_by_Weight, Percent), ((Al(III), 1.0),))",0.98,0.98,0.98,0.98,0.98
"((Measurement_Condition, Molar), ((Al(III), 1.0),))",0.005,0.005,0.005,0.005,0.005
"((Measurement_Condition, Molar), ((Fake, 1.0), (Fake, 2.0)))",0.006,0.006,0.006,0.006,0.006
"((Measurement_Condition, Molar), ((OH-, 1.0), (K+, 1.0)))",2.93,4.92,6.85,9.13,10.71


Unnamed: 0,0,1,2,3,4,5,6,7
"((Material_Property, Density, g/cm^3), ((Fake, 1.0), (Fake, 2.0)))",1.05,1.05,1.05,1.05,1.05,1.05,1.05,1.05
"((Material_Property, Poor, Quality), ((Fake, 1.0), (Fake, 2.0)))",Poor,Poor,Poor,Poor,Poor,Poor,Poor,Poor
"((Material_Property, Purity_by_Weight, Percent), ((Al(III), 1.0),))",0.98,0.98,0.98,0.98,0.98,0.98,0.98,0.98
"((Measurement_Condition, Molar), ((Al(III), 1.0),))",0.005,0.005,0.005,0.005,0.005,0.005,0.005,0.005
"((Measurement_Condition, Molar), ((Fake, 1.0), (Fake, 2.0)))",0.006,0.006,0.006,0.006,0.006,0.006,0.006,0.006
"((Measurement_Condition, Molar), ((OH-, 1.0), (Li+, 1.0)))",0.66,1.1,1.64,2.14,2.59,3.11,3.59,4.11


Unnamed: 0,0,1,2,3,4,5,6,7,8
"((Material_Property, Density, g/cm^3), ((Fake, 1.0), (Fake, 2.0)))",1.05,1.05,1.05,1.05,1.05,1.05,1.05,1.05,1.05
"((Material_Property, Poor, Quality), ((Fake, 1.0), (Fake, 2.0)))",Poor,Poor,Poor,Poor,Poor,Poor,Poor,Poor,Poor
"((Material_Property, Purity_by_Weight, Percent), ((Al(III), 1.0),))",0.98,0.98,0.98,0.98,0.98,0.98,0.98,0.98,0.98
"((Measurement_Condition, Molar), ((Al(III), 1.0),))",0.005,0.005,0.005,0.005,0.005,0.005,0.005,0.005,0.005
"((Measurement_Condition, Molar), ((Fake, 1.0), (Fake, 2.0)))",0.006,0.006,0.006,0.006,0.006,0.006,0.006,0.006,0.006
"((Measurement_Condition, Molar), ((Na+, 1.0), (OH-, 1.0)))",4.98,0.93,6.97,8.96,10.95,12.99,14.92,16.97,18.92


In [71]:
# node.factors

In [101]:
pprint(node.all_samples)

{{'Demo second Solution': {'species': [{'Fake': 2.0}], 'sources': [{'species': [Species Reference:  Fake
Stoichiometry:      1.0], 'factors': [Factor Type:   Material Property
Float Value:   1.05
String Value:  None
Ref Value:     Density
Unit:          g/cm^3
CSV Index:     None
, Factor Type:   Material Property
Float Value:   None
String Value:  None
Ref Value:     Poor
Unit:          Quality
CSV Index:     None
]}], 'factors': [{'Measurement_Condition_Molar': 0.006}]}},
 {None: {'species': [{'K+': 1.0}, {'OH-': 1.0}], 'sources': [{'species': [Species Reference:  None
Stoichiometry:      None], 'factors': []}], 'factors': [None]}},
 {'Lithium Hydroxide': {'species': [{'Li+': 1.0}, {'OH-': 1.0}], 'sources': [{'species': [Species Reference:  None
Stoichiometry:      None], 'factors': []}], 'factors': [None]}},
 {'Sodium Hydroxide': {'species': [{'Na+': 1.0}, {'OH-': 1.0}], 'sources': [{'species': [Species Reference:  None
Stoichiometry:      None], 'factors': []}], 'factors': [None]}}

In [100]:
node.all_species
# for s in set(node.all_species):
#     print(s)

[Species Reference:  Li+
 Stoichiometry:      1.0, Species Reference:  K+
 Stoichiometry:      1.0, Species Reference:  Al(III)
 Stoichiometry:      1.0, Species Reference:  OH-
 Stoichiometry:      1.0, Species Reference:  OH-
 Stoichiometry:      1.0, Species Reference:  OH-
 Stoichiometry:      1.0, Species Reference:  Al(III)
 Stoichiometry:      1.0, Species Reference:  Fake
 Stoichiometry:      2.0, Species Reference:  Na+
 Stoichiometry:      1.0, Species Reference:  Fake
 Stoichiometry:      1.0]

In [90]:
# for f in set(node.all_factors):
#     print(f)

In [14]:
node.comments

[Comment: Study level comment on Sipos 2006.
Body:    I manually pulled this points out with a web tool.]

In [15]:
# node.assays

In [16]:
node.samples.as_dict.maps

[{'Aluminate Solution': {'species': [{'Al(III)': 1.0}],
   'sources': [{'species': [Species Reference:  Al(III)
     Stoichiometry:      1.0], 'factors': [Factor Type:   Material Property
     Float Value:   0.98
     String Value:  None
     Ref Value:     Purity by Weight
     Unit:          Percent
     CSV Index:     None
     ]}],
   'factors': [{'Measurement_Condition_Molar': 0.005}]}},
 {'Demo second Solution': {'species': [{'Fake': 2.0}],
   'sources': [{'species': [Species Reference:  Fake
     Stoichiometry:      1.0], 'factors': [Factor Type:   Material Property
     Float Value:   1.05
     String Value:  None
     Ref Value:     Density
     Unit:          g/cm^3
     CSV Index:     None
     , Factor Type:   Material Property
     Float Value:   None
     String Value:  None
     Ref Value:     Poor
     Unit:          Quality
     CSV Index:     None
     ]}],
   'factors': [{'Measurement_Condition_Molar': 0.006}]}}]

In [17]:
node.factors.as_dict.maps

[{'Measurement_Condition_Celsius': 25.0},
 None,
 {'Measurement_Condition_MHz': 78.204},
 {'Measurement_Reference_[KAl(SO4)2]_Reference_Compound': '[KAl(SO4)2]'}]

In [18]:
node.info

{'$id': 'https://lampdev02.pnl.gov/bigg006/idreamdrupal/', 'title': 'Sipos 2006, Talanta NMR Figures', 'filename': 'sipos_2006_talanta_nmr_figs.json', 'description': 'Extracted figures.', 'submissionDate': '2018-5-25', 'publicReleaseDate': '2006-03-13', 'experimentSubType': 'Al_NMR'}

In [16]:
node.comments.as_dict.maps

[{'Study level comment on Sipos 2006.': 'I manually pulled this points out with a web tool.'}]

In [17]:
node.samples.as_dict.maps

[{'Aluminate Solution': {'species': [{'Al(III)': 1.0}],
   'sources': [{'species': [Species Reference:  Al(III)
     Stoichiometry:      1.0], 'factors': [Factor Type:   Material Property
     Float Value:   0.98
     String Value:  None
     Ref Value:     Purity by Weight
     Unit:          Percent
     CSV Index:     None
     ]}],
   'factors': [{'Measurement_Condition_Molar': 0.005}]}},
 {'Demo second Solution': {'species': [{'Fake': 2.0}],
   'sources': [{'species': [Species Reference:  Fake
     Stoichiometry:      1.0], 'factors': [Factor Type:   Material Property
     Float Value:   1.05
     String Value:  None
     Ref Value:     Density
     Unit:          g/cm^3
     CSV Index:     None
     , Factor Type:   Material Property
     Float Value:   None
     String Value:  None
     Ref Value:     Poor
     Unit:          Quality
     CSV Index:     None
     ]}],
   'factors': [{'Measurement_Condition_Molar': 0.006}]}}]

In [18]:
node.assays.as_dict.maps

[{'sipos_2006_talanta_fig_3_KOH.csv': {'data': {0: [2.93,
     4.92,
     6.85,
     9.13,
     10.71],
    1: [79.9, 79.84, 79.72, 79.66, 79.66]},
   'factors': [{'Measurement_Condition_Test': 1.05},
    {'Measurement_Condition_Celsius': 25.0},
    None,
    {'Measurement_Condition_MHz': 78.204},
    {'Measurement_Reference_[KAl(SO4)2]_Reference_Compound': '[KAl(SO4)2]'}],
   'samples': [{None: {'species': [{'K+': 1.0}, {'OH-': 1.0}],
      'sources': [{'species': [Species Reference:  None
        Stoichiometry:      None], 'factors': []}],
      'factors': [None]}},
    {'Aluminate Solution': {'species': [{'Al(III)': 1.0}],
      'sources': [{'species': [Species Reference:  Al(III)
        Stoichiometry:      1.0], 'factors': [Factor Type:   Material Property
        Float Value:   0.98
        String Value:  None
        Ref Value:     Purity by Weight
        Unit:          Percent
        CSV Index:     None
        ]}],
      'factors': [{'Measurement_Condition_Molar': 0.005}]}},

---

In [145]:
for assay in node.assays:
#     print(assay.info)
#     print(assay.factors)
#     for f in assay.factors:
# #         print(f)
#         print(f.dict_label)
        
#         print(f.dict_value)
#         print('\n')
        
    for s in assay.samples:
        print(s)
        print('-'*10)
#         print(s.all_species)
        for sp in s.all_species:
            print(sp.as_dict)
            print('-'*10)
            
            
        for fa in s.all_factors:
            print(fa.as_dict)
#         print(s.all_factors)
#         print(i.dict_label for i in s.all_factors)
#         print(i.dict_value for i in s.all_factors)
        
        
#         break
#     print(assay.comments)
#     print(assay.samples)
#     print(assay.samples)
    break
    

<isadream.isadream.models.nodal.SampleNode object at 0x7fdfa685b2b0>
----------
{'K+': 1.0}
----------
{'OH-': 1.0}
----------
None
----------
None
<isadream.isadream.models.nodal.SampleNode object at 0x7fdfa6869208>
----------
{'Al(III)': 1.0}
----------
{'Al(III)': 1.0}
----------
{'Measurement_Condition_Molar': 0.005}
{'Material_Property_Purity_by_Weight_Percent': 0.98}
<isadream.isadream.models.nodal.SampleNode object at 0x7fdfa6869080>
----------
{'Fake': 2.0}
----------
{'Fake': 1.0}
----------
{'Measurement_Condition_Molar': 0.006}
{'Material_Property_Density_g/cm^3': 1.05}
{'Material_Property_Poor_Quality': 'Poor'}


In [146]:
for assay in node.assays:
    print(assay.info)
    assay.samples
#     print('\n')    
#     print(assay.factors)
#     for f in assay.factors:
#         print(f)
#     print('\n') 
#     for f in assay.all_factors:
#         print(f)
#     print(assay.samples)
#     for s in assay.all_samples:
#         print(s)
#     print('\n')    
    print('\n')
#     print(assay.csv_index_factors)
#     print('\n')
#     print(assay._datafile_dict)
#     print('\n')
#     for s in assay.all_species:
#         print(s)
#     break

AttributeError: 'AssayNode' object has no attribute 'info'

## Query Design

By **species** or **factors**.

If a matching `species_reference` or `factor_type` is found, each `Assay` object should return its data formated by:
+ species
+ queried factors 
    + should be provied as a function of any matching CSV index values.

In [147]:
import csv

In [148]:
utils.SIPOS_DEMO

'/home/tyler/git/isadream/isadream/demo_data/demo_json/sipos_2006_talanta_nmr_figs.json'

In [149]:
from collections import defaultdict
from csv import DictReader

In [150]:
print(parse_csv_by_field(csv_path))

NameError: name 'parse_csv_by_field' is not defined

In [None]:
json_data = io.read_idream_json(utils.SIPOS_DEMO)

In [None]:
[] + None or [1]

In [None]:
test_factor = json_data['studyFactors'][0]
test_factor

In [None]:
t_factor = elemental.Factor(test_factor)
print(t_factor)

In [None]:
t_factor.query(['Celsius'])

---

In [None]:
test_sample = json_data['studySamples'][0]
test_sample

### Test Species Element 

In [None]:
test_species = json_data['studySamples'][0]['species'][0]
t_species = elemental.SpeciesFactor(test_species)
print(t_species)

### Species Property

A list of Species objects.

In [None]:
species_prop_test = containers.Species([test_species])
print(species_prop_test)

### SampleNode

In [None]:
io.parse_source()

In [None]:
sample_list = list()
for sample_dict in json_data['studySamples']:
    sample_list.append(io.parse_sample(sample_dict))

In [None]:
sample_list

### Samples Container

In [None]:
containers.Samples(sample_list)

In [None]:
def tester(arg=[]):
    return arg

In [None]:
tester()

In [None]:
nodal.Sample(Node)

In [None]:
drupalnode = io.parse_json(json_data)
print(drupalnode)

In [None]:
for a in drupalnode.assays:
    print(a)
    for f in a.factors:
        print(f)
        for fact in f:
            print(fact)

In [None]:
[] + [1, 2, 3]

In [None]:
json_data

In [None]:
print(node.nodeinfo)

In [None]:
node.factors

In [None]:
node.comments

In [None]:
for factor in node.factors:
    print(factor)

In [None]:
for assay in node.assays:
    print(assay)
#     for factor in assay.factors:
#         print(factor)

In [None]:
import isadream.isadream.model as IdreamModel
from isadream.isadream.model import SIPOS_DEMO
from isadream.isadream.model import normalize_to_dataframe
from isadream.isadream.model import load_csv
import itertools
import json
import pandas as pd

---

## The Metadata Dataframe

The `Assay` is the lowest level of separation in metadata.

In [None]:
MODEL = IdreamModel.Model(SIPOS_DEMO)
# MODEL.metadata_frame
# MODEL.assay_metadata
# MODEL.labeled_csv_data
MODEL.csv_metadata[0]  # a list of dataframes.

In [None]:
MODEL.csv_metadata[1]  # a list of dataframes.

In [None]:
MODEL.labeled_csv_data[1]

In [None]:
# MODEL.csv_metadata

In [None]:
MODEL.csv_metadata[0]

In [None]:
MODEL = IdreamModel.Model(SIPOS_DEMO)

for data_file_df in MODEL.csv_metadata:
    csv_idx_array = data_file_df.loc(axis=1)[:,:,'csvColumnIndex'].values
    md_idx_array = data_file_df.index.values
    # Create the mapping dictionary.
    data_map = {md_idx: load_csv(md_idx[0], usecols=[int(csv_idx)]).T.values.flatten()
                for md_idx, csv_idx in zip(md_idx_array, csv_idx_array)}    
    data_file_df['data'] = md_idx_array
    data_file_df['data'] = data_file_df['data'].map(data_map)
    display(data_file_df)
    break

In [None]:
MODEL = IdreamModel.Model(SIPOS_DEMO)

MODEL.labeled_csv_data[0]

In [None]:
# df["B"] = df["A"].map(equiv)

In [None]:
MODEL.csv_metadata[0].values[:,]

In [None]:
MODEL.csv_metadata[1]

In [None]:
MODEL.assay_metadata.loc(axis=1)[:, : ,'csvColumnIndex'].columns.get_level_values(-1)

In [None]:
MODEL.assay_metadata.loc(axis=1)[:, : ,'csvColumnIndex'].columns.values

In [None]:
cols = MODEL.assay_metadata.loc(axis=1)[:, : ,'csvColumnIndex'].columns.values
# cols = [tuple(cols[:-1])]
cols = tuple(c[:-1] for c in cols)
cols

In [None]:
MODEL.assay_metadata.loc(axis=1)[cols[0], :, :]

In [None]:
MODEL.assay_metadata.xs('csvColumnIndex', axis=1, level=-1, drop_level=True)

In [None]:
# MODEL.assay_metadata.select_dtypes(object)

In [None]:
# MODEL.assay_metadata.select_dtypes(float)

In [None]:
# MODEL.assay_metadata.select_dtypes(int)

In [None]:
# build_key_df(MODEL.assay_metadata)

In [None]:
# MODEL.csv_data

In [None]:
# MODEL.assay_metadata[:][:]['csvColumnIndex']

In [None]:
def create_key_value(in_dataframe=MODEL.assay_df):
    working_df = in_dataframe.copy()
    
    columns = working_df.columns
    discrete = [x for x in columns if working_df[x].dtype == object]
    continuous = [x for x in columns if x not in discrete]
    
    value_dict = {}

    for cont_indexes in continuous:
        for row in working_df.itertuples():
            value_dict[cont_indexes, row] = working_df[cont_indexes]

    return pd.DataFrame(value_dict)

In [None]:
create_key_value()

In [None]:
create_key_value(MODEL.study_sample_df)

In [None]:
MODEL.study_factor_df

In [None]:
create_key_value(MODEL.study_factor_df)