In [1]:
%matplotlib inline
import numpy as np
import pandas as pd

---
## Load Dataset
---

In [2]:
df_catalysis_dataset = pd.read_csv("../data/OCM-data.csv", index_col=0, header=0)
df_catalysis_dataset.sample(20)

Unnamed: 0_level_0,M1,M1_atom_number,M2,M2_atom_number,M3,M3_atom_number,Support,Support_ID,M2_mol,M3_mol,...,C2y,C2H6y,C2H4y,COy,CO2y,C2s,C2H6s,C2H4s,COs,CO2s
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Mn-Na2WO4/ZrO2,Mn,25,Na,11,W,74,ZrO2,13,0.37,0.185,...,7.36,1.29,6.07,14.4,3.7,30.33,5.32,25.01,59.33,15.25
Mn-Na2WO4/SiCnf,Mn,25,Na,11,W,74,SiCnf,10,0.37,0.185,...,8.01,1.37,6.64,25.54,6.49,23.6,4.04,19.56,75.25,19.12
Mn-Na2WO4/Nb2O5,Mn,25,Na,11,W,74,Nb2O5,8,0.37,0.185,...,8.25,1.39,6.86,15.06,3.17,34.65,5.84,28.81,63.25,13.31
CeO2,n.a.,0,n.a.,0,n.a.,0,CeO2,5,0.0,0.0,...,7.07,1.73,5.34,3.14,10.25,46.76,11.44,35.32,20.77,67.79
Mn-WOx/SiO2,Mn,25,n.a.,0,W,74,SiO2,11,0.0,0.185,...,3.53,1.89,1.64,1.74,3.49,35.19,18.84,16.35,17.35,34.8
Mn-Na2WO4/BN,Mn,25,Na,11,W,74,BN,4,0.37,0.185,...,4.61,0.62,3.99,24.26,2.76,15.16,2.04,13.13,79.8,9.08
Mn-MoOx/SiO2,Mn,25,n.a.,0,Mo,42,SiO2,11,0.0,0.185,...,1.64,1.03,0.61,5.3,3.89,15.5,9.74,5.77,50.09,36.77
Fe-Na2WO4/SiO2,Fe,26,Na,11,W,74,SiO2,11,0.37,0.185,...,4.87,2.53,2.34,1.88,1.51,74.58,38.74,35.83,28.79,23.12
Blank,n.a.,0,n.a.,0,n.a.,0,n.a.,7,0.0,0.0,...,1.48,0.75,0.73,12.58,1.2,7.76,3.93,3.83,66.0,6.3
Mn-Na2MoO4/SiO2,Mn,25,Na,11,Mo,42,SiO2,11,0.37,0.185,...,5.11,2.83,2.28,1.55,4.42,55.73,30.86,24.86,16.9,48.2


---
## Check dtypes
---

In [3]:
df_catalysis_dataset.dtypes

Name                  object
M1                    object
M1_atom_number         int64
M2                    object
M2_atom_number         int64
M3                    object
M3_atom_number         int64
Support               object
Support_ID             int64
M2_mol               float64
M3_mol               float64
M1_mol_percentage      int64
M2_mol_percentage      int64
M3_mol_percentage      int64
Temp                   int64
Total_flow             int64
Ar_flow              float64
CH4_flow             float64
O2_flow              float64
CT                   float64
CH4/O2                 int64
CH4_conv             float64
C2y                  float64
C2H6y                float64
C2H4y                float64
COy                  float64
CO2y                 float64
C2s                  float64
C2H6s                float64
C2H4s                float64
COs                  float64
CO2s                 float64
dtype: object

In [None]:
df_catalysis_dataset['M1_mol_percentage']

---
## Check basic statistics for numeric columns
---

In [None]:
df_catalysis_dataset.describe()

---
## Check if there are any NaN
---

In [None]:
df_catalysis_dataset.isna().sum()

---


## Requirements


* It would be nice to look at distributions in the data (histograms scatter plots, principal components, etc.). 


* Filters could be CH4 conversion, C2y, temperature, pressure, CH4/O2 ratio, M1/M2/M3, etc., CO + CO2 selectivity, etc.). 


---

---
## Generate unique values of filters to setup sliders

* Sliders for CH4_conv and C2y

* Dropdown for Temp and CH4/O2

---

In [None]:
(df_catalysis_dataset['CH4_conv']
 .sort_values()
 .unique()
)

In [None]:
(df_catalysis_dataset['C2y']
 .sort_values()
 .unique()
)

In [None]:
(df_catalysis_dataset['Temp']
 .sort_values()
 .unique()
)

In [None]:
unique_ch4_to_o2 = (df_catalysis_dataset['CH4/O2']
 .sort_values()
 .astype(str)
 .unique()
)
sorted_unique_ch4_to_o2 = dict(zip(unique_ch4_to_o2, unique_ch4_to_o2))
print(sorted_unique_ch4_to_o2)

---
## Set up Bokeh Plot


* Utilizing [gallery example of movies](https://github.com/bokeh/bokeh/tree/branch-2.4/examples/app/movies)


* It has filters and determination of x and y axis


---

In [None]:
from bokeh.io import curdoc
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, Div, Select, Slider, TextInput
from bokeh.plotting import figure

In [None]:
axis_map_x = {
    "Ethane_y": "C2H6y",
    "Ethylene_y": "C2H4y",
    "CarbonDiOxide_y": "CO2y",
    "CarbonMonoOxide_y": "COy",
    "DiCarbon_s": "C2s",
    "Ethane_s": "C2H6s",
    "Ethylene_s": "C2H4s",
    "CarbonDiOxide_s": "CO2s",
    "CarbonMonoOxide_s": "COs",
}

In [None]:
axis_map_y = {
    "Ethane_y": "C2H6y",
    "Ethylene_y": "C2H4y",
    "CarbonDiOxide_y": "CO2y",
    "CarbonMonoOxide_y": "COy",
    "DiCarbon_s": "C2s",
    "Ethane_s": "C2H6s",
    "Ethylene_s": "C2H4s",
    "CarbonDiOxide_s": "CO2s",
    "CarbonMonoOxide_s": "COs",
}

In [None]:
# Create Input controls
slider_methane_conversion = Slider(title="Minimum Methane conversion value", 
                                   value=20, start=1, end=46, step=1)
slider_C2y = Slider(title="Minimum value of C2y", start=0.1, end=22.1, value=4.0, step=0.1)
slider_temp = Slider(title="Minimum value of Temperature", start=700.0, end=900.0, value=800.0, step=50.0)
select_ch4_to_o2 = Select(title="CH4 to O2", options=sorted(sorted_unique_ch4_to_o2.keys()), value="6")
select_x_axis = Select(title="X Axis", options=sorted(axis_map_x.keys()), value="Ethane_y")
select_y_axis = Select(title="Y Axis", options=sorted(axis_map_y.keys()), value="CarbonDiOxide_y")

In [None]:
TOOLTIPS=[
    ("M1 Percent", "@M1_mol_percent"),
    ("M2 Percent", "@M2_mol_percent"),
    ("M3 Percent", "@M3_mol_percent")
]

In [None]:
# Create Column Data Source that will be used by the plot
source = ColumnDataSource(data=dict(x=[], y=[], M1_mol_percent=[],
                                    M2_mol_percent=[], M3_mol_percent=[]))

In [None]:
p = figure(height=600, width=700, title="", toolbar_location=None, tooltips=TOOLTIPS, sizing_mode="scale_both")
p.circle(x="x", y="y", source=source, size=7, color='mediumblue', line_color=None, fill_alpha=0.6)

In [None]:
def select_data():
    temp_val = slider_temp.value
    select_ch4_to_o2_val = select_ch4_to_o2.value
    selected = df_catalysis_dataset[
        (df_catalysis_dataset.CH4_conv >= slider_methane_conversion.value) &
        (df_catalysis_dataset.C2y >= slider_C2y.value) &
        (df_catalysis_dataset.Temp == slider_temp.value) &
        (df_catalysis_dataset['CH4/O2'] == select_ch4_to_o2.value)
    ]
    return selected

In [None]:
def update():
    df = select_data()
    x_name = axis_map_x[select_x_axis.value]
    y_name = axis_map_y[select_y_axis.value]

    p.xaxis.axis_label = select_x_axis.value
    p.yaxis.axis_label = select_y_axis.value
    p.title.text = 'Title TBD'
    source.data = dict(
        x=df[x_name],
        y=df[y_name],
        M1_mol_percent=df['M1_mol_percentage'],
        M2_mol_percent=df['M2_mol_percentage'], 
        M3_mol_percent=df['M3_mol_percentage'],
    )

In [None]:
controls = [slider_methane_conversion, slider_C2y, slider_temp, select_ch4_to_o2, select_x_axis, select_y_axis]
for control in controls:
    control.on_change('value', lambda attr, old, new: update())

In [None]:
inputs = column(*controls, width=320)

In [None]:
axis_map_y['CarbonDiOxide_y']

In [None]:
l = column(row(inputs, p), sizing_mode="scale_both")

update()  # initial load of the data

curdoc().add_root(l)
curdoc().title = "Catalysis Data"