In [13]:
import pandas as pd
import numpy as np
import json
import os
import bokeh
from bokeh.io import output_notebook, output_file, save
from bokeh.palettes import Colorblind
from bokeh.models import HoverTool, ColumnDataSource
import bokeh.plotting as bk

In [2]:
output_notebook()
# output_file('alNaoh.png')

In [3]:
def create_dataframes(json_metadata_path):
    """Construct dataframes with the needed metadata attached."""
    # Read the metadata json file constructed by ISASetup.py
    with open(json_metadata_path, 'r') as f:
        metadata = json.load(f)
    # create a dataframes for each csv found
    dataFrame_list = []
    for study in metadata['studies']:
        # Store the doi/link
        for publication in study['publications']:
            doi = publication['doi']
        for assay in study['assays']:
            for filename in assay['dataFiles']:
                df = pd.read_csv(filename['name'])
                df['doi'] = doi
                dataFrame_list.append(df)
    return pd.concat(dataFrame_list, ignore_index=True)

In [4]:
data = create_dataframes('data/nmr_metadata.json')
data.keys()

Index(['Al_concentration', 'Al_ppm', 'CI_concentration', 'OH_concentration',
       'counter_ion', 'doi', 'temperature', 'wavelength'],
      dtype='object')

In [5]:
data.dtypes

Al_concentration    float64
Al_ppm              float64
CI_concentration    float64
OH_concentration    float64
counter_ion          object
doi                  object
temperature           int64
wavelength          float64
dtype: object

In [6]:
data['counter_ion'].factorize()

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 Index(['Na+', 'Cs+', 'Li+', 'K+'], dtype='object'))

In [7]:
data.dtypes

Al_concentration    float64
Al_ppm              float64
CI_concentration    float64
OH_concentration    float64
counter_ion          object
doi                  object
temperature           int64
wavelength          float64
dtype: object

In [8]:
columns = sorted(data.columns)
discrete = [x for x in columns if data[x].dtype == object]
continuous = [x for x in columns if x not in discrete]
quantileable = [x for x in continuous if len(data[x].unique()) > 20]

In [9]:
SIZES = list(range(6, 22, 3))
COLORS = Colorblind[4]
COLORS

['#0072B2', '#E69F00', '#F0E442', '#009E73']

In [10]:
data['counter_ion'].factorize()

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
 Index(['Na+', 'Cs+', 'Li+', 'K+'], dtype='object'))

In [34]:
colormap = {
    'Na+': '#0072B2',
    'Li+': '#E69F00',
    'K+': '#F0E442',
    'Cs+': '#009E73',
}
data['ion_colors'] = [colormap[x] for x in data['counter_ion']]

In [35]:
source = ColumnDataSource(data)
source

In [37]:

hover = HoverTool(tooltips=[
    ("(x,y)", "($x, $y)"),
    ("doi", "@{doi}")
])


fig = bk.figure(
    title='Al ppm v. [MOH]',
    tools=[hover],
)
fig.xaxis.axis_label = '[MOH]'
fig.yaxis.axis_label = '27 Al ppm'
fig.circle(
    x='OH_concentration',
    y='Al_ppm',
    source=source,
    color='ion_colors',
)


fig.background_fill_color = "beige"
fig.background_fill_alpha = 0.5
bk.show(fig)
# save(fig, 'generated_images/Al_ppm_v_NaOH.html')