# Data Classes

**Overview**

There are five top level groups in the `.json` file that contain information about a given *Node*. There are read into, and prepared by the `Model` class. The top level groups are:

+ nodeInformation
+ studyFactors
+ studySamples
+ assays
+ comments

*Setting up auto-reloading of the isadream package.*

---

In [1]:
# Path hack to allow imports from the parent directory.
import sys
import os
sys.path.insert(0, os.path.abspath('../../'))

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from isadream.isadream.models import atomic, compound, utils
from isadream.isadream import io

In [4]:
demo_json = utils.SIPOS_DEMO
demo_json = io.read_idream_json(demo_json)
# demo_json

In [5]:
node = io.parse_json(demo_json)

In [6]:
node

<isadream.isadream.models.compound.DrupalNode at 0x7f04d332b080>

In [7]:
print(node.nodeinfo)

[<isadream.isadream.models.atomic.NodeInfo object at 0x7f04fefc0d30>, <isadream.isadream.models.atomic.NodeInfo object at 0x7f04d3316f60>, <isadream.isadream.models.atomic.NodeInfo object at 0x7f04d3316fd0>, <isadream.isadream.models.atomic.NodeInfo object at 0x7f04d3316f98>, <isadream.isadream.models.atomic.NodeInfo object at 0x7f04fc6a5ba8>, <isadream.isadream.models.atomic.NodeInfo object at 0x7f04d332b0f0>, <isadream.isadream.models.atomic.NodeInfo object at 0x7f04d332b0b8>]


In [8]:
node.factors

[<isadream.isadream.models.atomic.Factor at 0x7f04d332b898>,
 <isadream.isadream.models.atomic.Factor at 0x7f04d332b8d0>,
 <isadream.isadream.models.atomic.Factor at 0x7f04d332b908>,
 <isadream.isadream.models.atomic.Factor at 0x7f04d332b940>]

In [9]:
node.comments

[<isadream.isadream.models.atomic.Comment at 0x7f04d3332080>]

In [10]:
for factor in node.factors:
    print(factor)

Factor Type:   Measurement Condition
Float Value:   25.0
String Value:  None
Ref Value:     None
Unit:          Celsius
CSV Index:     None

Factor Type:   Measurement
Float Value:   None
String Value:  None
Ref Value:     None
Unit:          ppm
CSV Index:     1.0

Factor Type:   Measurement Condition
Float Value:   78.204
String Value:  None
Ref Value:     None
Unit:          MHz
CSV Index:     None

Factor Type:   Measurement Reference
Float Value:   None
String Value:  None
Ref Value:     [KAl(SO4)2]
Unit:          Reference Compound
CSV Index:     None



In [11]:
for assay in node.assays:
    print(assay)
#     for factor in assay.factors:
#         print(factor)

TypeError: 'SourceNode' object is not iterable

In [3]:
import isadream.isadream.model as IdreamModel
from isadream.isadream.model import SIPOS_DEMO
from isadream.isadream.model import normalize_to_dataframe
from isadream.isadream.model import load_csv
import itertools
import json
import pandas as pd

---

## The Metadata Dataframe

The `Assay` is the lowest level of separation in metadata.

In [38]:
MODEL = IdreamModel.Model(SIPOS_DEMO)
# MODEL.metadata_frame
# MODEL.assay_metadata
# MODEL.labeled_csv_data
MODEL.csv_metadata[0]  # a list of dataframes.

Unnamed: 0_level_0,Unnamed: 1_level_0,samples,samples,samples,samples,samples
Unnamed: 0_level_1,Unnamed: 1_level_1,AssaySampleFactors,AssaySampleFactors,AssaySampleFactors,name,species
Unnamed: 0_level_2,Unnamed: 1_level_2,csvColumnIndex,factorType,unitRef,NaN,stoichiometry
dataFile,samples.species.speciesReference,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
sipos_2006_talanta_fig_3_KOH.csv,K+,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0
sipos_2006_talanta_fig_3_KOH.csv,OH-,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0
sipos_2006_talanta_fig_3_LiOH.csv,Li+,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0
sipos_2006_talanta_fig_3_LiOH.csv,OH-,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0
sipos_2006_talanta_fig_3_NaOH.csv,Na+,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0
sipos_2006_talanta_fig_3_NaOH.csv,OH-,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0


In [40]:
MODEL.csv_metadata[1]  # a list of dataframes.

Unnamed: 0_level_0,Unnamed: 1_level_0,1,1,1
Unnamed: 0_level_1,Unnamed: 1_level_1,StudyFactor,StudyFactor,StudyFactor
Unnamed: 0_level_2,Unnamed: 1_level_2,csvColumnIndex,factorType,unitRef
dataFile,samples.species.speciesReference,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
sipos_2006_talanta_fig_3_KOH.csv,K+,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_KOH.csv,OH-,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_LiOH.csv,Li+,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_LiOH.csv,OH-,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_NaOH.csv,Na+,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_NaOH.csv,OH-,1.0,Measurement,ppm


In [44]:
MODEL.labeled_csv_data[1]

Unnamed: 0_level_0,Unnamed: 1_level_0,1,1,1,data
Unnamed: 0_level_1,Unnamed: 1_level_1,StudyFactor,StudyFactor,StudyFactor,Unnamed: 5_level_1
Unnamed: 0_level_2,Unnamed: 1_level_2,csvColumnIndex,factorType,unitRef,Unnamed: 5_level_2
dataFile,samples.species.speciesReference,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
sipos_2006_talanta_fig_3_KOH.csv,K+,1.0,Measurement,ppm,"[79.96, 79.9, 79.84, 79.72, 79.66, 79.66]"
sipos_2006_talanta_fig_3_KOH.csv,OH-,1.0,Measurement,ppm,"[79.96, 79.9, 79.84, 79.72, 79.66, 79.66]"
sipos_2006_talanta_fig_3_LiOH.csv,Li+,1.0,Measurement,ppm,"[79.96, 79.92, 79.84, 79.66, 79.54, 79.36, 79...."
sipos_2006_talanta_fig_3_LiOH.csv,OH-,1.0,Measurement,ppm,"[79.96, 79.92, 79.84, 79.66, 79.54, 79.36, 79...."
sipos_2006_talanta_fig_3_NaOH.csv,Na+,1.0,Measurement,ppm,"[79.82, 79.54, 79.98, 79.13, 78.45, 77.67, 76...."
sipos_2006_talanta_fig_3_NaOH.csv,OH-,1.0,Measurement,ppm,"[79.82, 79.54, 79.98, 79.13, 78.45, 77.67, 76...."


In [6]:
# MODEL.csv_metadata

In [9]:
MODEL.csv_metadata[0]

Unnamed: 0_level_0,Unnamed: 1_level_0,samples,samples,samples,samples,samples
Unnamed: 0_level_1,Unnamed: 1_level_1,AssaySampleFactors,AssaySampleFactors,AssaySampleFactors,name,species
Unnamed: 0_level_2,Unnamed: 1_level_2,csvColumnIndex,factorType,unitRef,NaN,stoichiometry
dataFile,samples.species.speciesReference,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
sipos_2006_talanta_fig_3_KOH.csv,K+,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0
sipos_2006_talanta_fig_3_KOH.csv,OH-,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0
sipos_2006_talanta_fig_3_LiOH.csv,Li+,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0
sipos_2006_talanta_fig_3_LiOH.csv,OH-,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0
sipos_2006_talanta_fig_3_NaOH.csv,Na+,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0
sipos_2006_talanta_fig_3_NaOH.csv,OH-,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0


In [10]:
MODEL = IdreamModel.Model(SIPOS_DEMO)

for data_file_df in MODEL.csv_metadata:
    csv_idx_array = data_file_df.loc(axis=1)[:,:,'csvColumnIndex'].values
    md_idx_array = data_file_df.index.values
    # Create the mapping dictionary.
    data_map = {md_idx: load_csv(md_idx[0], usecols=[int(csv_idx)]).T.values.flatten()
                for md_idx, csv_idx in zip(md_idx_array, csv_idx_array)}    
    data_file_df['data'] = md_idx_array
    data_file_df['data'] = data_file_df['data'].map(data_map)
    display(data_file_df)
    break

Unnamed: 0_level_0,Unnamed: 1_level_0,samples,samples,samples,samples,samples,data
Unnamed: 0_level_1,Unnamed: 1_level_1,AssaySampleFactors,AssaySampleFactors,AssaySampleFactors,name,species,Unnamed: 7_level_1
Unnamed: 0_level_2,Unnamed: 1_level_2,csvColumnIndex,factorType,unitRef,NaN,stoichiometry,Unnamed: 7_level_2
dataFile,samples.species.speciesReference,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
sipos_2006_talanta_fig_3_KOH.csv,K+,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0,"[0.89, 2.93, 4.92, 6.85, 9.13, 10.71]"
sipos_2006_talanta_fig_3_KOH.csv,OH-,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0,"[0.89, 2.93, 4.92, 6.85, 9.13, 10.71]"
sipos_2006_talanta_fig_3_LiOH.csv,Li+,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0,"[0.14, 0.66, 1.1, 1.64, 2.14, 2.59, 3.11, 3.59..."
sipos_2006_talanta_fig_3_LiOH.csv,OH-,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0,"[0.14, 0.66, 1.1, 1.64, 2.14, 2.59, 3.11, 3.59..."
sipos_2006_talanta_fig_3_NaOH.csv,Na+,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0,"[2.93, 4.98, 0.93, 6.97, 8.96, 10.95, 12.99, 1..."
sipos_2006_talanta_fig_3_NaOH.csv,OH-,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0,"[2.93, 4.98, 0.93, 6.97, 8.96, 10.95, 12.99, 1..."


In [11]:
MODEL = IdreamModel.Model(SIPOS_DEMO)

MODEL.labeled_csv_data[0]

Unnamed: 0_level_0,Unnamed: 1_level_0,samples,samples,samples,samples,samples,data
Unnamed: 0_level_1,Unnamed: 1_level_1,AssaySampleFactors,AssaySampleFactors,AssaySampleFactors,name,species,Unnamed: 7_level_1
Unnamed: 0_level_2,Unnamed: 1_level_2,csvColumnIndex,factorType,unitRef,NaN,stoichiometry,Unnamed: 7_level_2
dataFile,samples.species.speciesReference,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
sipos_2006_talanta_fig_3_KOH.csv,K+,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0,"[0.89, 2.93, 4.92, 6.85, 9.13, 10.71]"
sipos_2006_talanta_fig_3_KOH.csv,OH-,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0,"[0.89, 2.93, 4.92, 6.85, 9.13, 10.71]"
sipos_2006_talanta_fig_3_LiOH.csv,Li+,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0,"[0.14, 0.66, 1.1, 1.64, 2.14, 2.59, 3.11, 3.59..."
sipos_2006_talanta_fig_3_LiOH.csv,OH-,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0,"[0.14, 0.66, 1.1, 1.64, 2.14, 2.59, 3.11, 3.59..."
sipos_2006_talanta_fig_3_NaOH.csv,Na+,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0,"[2.93, 4.98, 0.93, 6.97, 8.96, 10.95, 12.99, 1..."
sipos_2006_talanta_fig_3_NaOH.csv,OH-,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0,"[2.93, 4.98, 0.93, 6.97, 8.96, 10.95, 12.99, 1..."


In [12]:
# df["B"] = df["A"].map(equiv)

In [17]:
MODEL.csv_metadata[0].values[:,]

array([[0.0, 'Measurement Condition', 'Molar', 'Potassium Hydroxide',
        1.0],
       [0.0, 'Measurement Condition', 'Molar', 'Potassium Hydroxide',
        1.0],
       [0.0, 'Measurement Condition', 'Molar', 'Lithium Hydroxide', 1.0],
       [0.0, 'Measurement Condition', 'Molar', 'Lithium Hydroxide', 1.0],
       [0.0, 'Measurement Condition', 'Molar', 'Sodium Hydroxide', 1.0],
       [0.0, 'Measurement Condition', 'Molar', 'Sodium Hydroxide', 1.0]],
      dtype=object)

In [19]:
MODEL.csv_metadata[1]

Unnamed: 0_level_0,Unnamed: 1_level_0,1,1,1
Unnamed: 0_level_1,Unnamed: 1_level_1,StudyFactor,StudyFactor,StudyFactor
Unnamed: 0_level_2,Unnamed: 1_level_2,csvColumnIndex,factorType,unitRef
dataFile,samples.species.speciesReference,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
sipos_2006_talanta_fig_3_KOH.csv,K+,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_KOH.csv,OH-,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_LiOH.csv,Li+,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_LiOH.csv,OH-,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_NaOH.csv,Na+,1.0,Measurement,ppm
sipos_2006_talanta_fig_3_NaOH.csv,OH-,1.0,Measurement,ppm


In [20]:
MODEL.assay_metadata.loc(axis=1)[:, : ,'csvColumnIndex'].columns.get_level_values(-1)

Index(['csvColumnIndex', 'csvColumnIndex'], dtype='object')

In [21]:
MODEL.assay_metadata.loc(axis=1)[:, : ,'csvColumnIndex'].columns.values

array([('samples', 'AssaySampleFactors', 'csvColumnIndex'),
       ('1', 'StudyFactor', 'csvColumnIndex')], dtype=object)

In [22]:
cols = MODEL.assay_metadata.loc(axis=1)[:, : ,'csvColumnIndex'].columns.values
# cols = [tuple(cols[:-1])]
cols = tuple(c[:-1] for c in cols)
cols

(('samples', 'AssaySampleFactors'), ('1', 'StudyFactor'))

In [23]:
MODEL.assay_metadata.loc(axis=1)[cols[0], :, :]

Unnamed: 0_level_0,Unnamed: 1_level_0,samples,samples,samples,samples,samples
Unnamed: 0_level_1,Unnamed: 1_level_1,AssaySampleFactors,AssaySampleFactors,AssaySampleFactors,name,species
Unnamed: 0_level_2,Unnamed: 1_level_2,csvColumnIndex,factorType,unitRef,NaN,stoichiometry
dataFile,samples.species.speciesReference,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
sipos_2006_talanta_fig_3_KOH.csv,K+,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0
sipos_2006_talanta_fig_3_KOH.csv,OH-,0.0,Measurement Condition,Molar,Potassium Hydroxide,1.0
sipos_2006_talanta_fig_3_LiOH.csv,Li+,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0
sipos_2006_talanta_fig_3_LiOH.csv,OH-,0.0,Measurement Condition,Molar,Lithium Hydroxide,1.0
sipos_2006_talanta_fig_3_NaOH.csv,Na+,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0
sipos_2006_talanta_fig_3_NaOH.csv,OH-,0.0,Measurement Condition,Molar,Sodium Hydroxide,1.0


In [None]:
MODEL.assay_metadata.xs('csvColumnIndex', axis=1, level=-1, drop_level=True)

In [None]:
# MODEL.assay_metadata.select_dtypes(object)

In [None]:
# MODEL.assay_metadata.select_dtypes(float)

In [None]:
# MODEL.assay_metadata.select_dtypes(int)

In [None]:
# build_key_df(MODEL.assay_metadata)

In [None]:
# MODEL.csv_data

In [None]:
# MODEL.assay_metadata[:][:]['csvColumnIndex']

In [None]:
def create_key_value(in_dataframe=MODEL.assay_df):
    working_df = in_dataframe.copy()
    
    columns = working_df.columns
    discrete = [x for x in columns if working_df[x].dtype == object]
    continuous = [x for x in columns if x not in discrete]
    
    value_dict = {}

    for cont_indexes in continuous:
        for row in working_df.itertuples():
            value_dict[cont_indexes, row] = working_df[cont_indexes]

    return pd.DataFrame(value_dict)

In [None]:
create_key_value()

In [None]:
create_key_value(MODEL.study_sample_df)

In [None]:
MODEL.study_factor_df

In [None]:
create_key_value(MODEL.study_factor_df)