In [1]:
from pathlib import Path
import sys
import pandas as pd
import numpy as np
#import qgrid
import pickle
import sklearn
from datetime import datetime
#from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from modAL.models import ActiveLearner

package_path = Path.cwd().parent
if package_path not in sys.path:
    sys.path.append(str(package_path))

from utils.dataset_class import Phase3DataSet, Setting
from custom_pipeline import get_model_pkl, dump_model_pkl

In [2]:
training_raw = pd.read_csv('../data/raw/0057.perovskitedata_DRPFeatures_2020-07-02.csv')
phase3_training = pickle.load(open('phase3_dataset.pkl', 'rb'))

## Set up data

In [3]:
dataset0 = phase3_training.get_dataset('ALHk', 0, 'random')['JMXLWMIFDJCGBV-UHFFFAOYSA-N']
dataset1 = phase3_training.get_dataset('ALHk', 1, 'random')['JMXLWMIFDJCGBV-UHFFFAOYSA-N']


## Initial set up, uncomment if no model files exist

In [9]:
"""
dt = DecisionTreeClassifier(**{'criterion': 'gini', 'splitter': 'best', 'max_depth': 9, 'min_samples_split': 6, 'min_samples_leaf': 3, 'class_weight': {0: 0.2520408163265306, 1: 0.7479591836734694}})
learner = ActiveLearner(estimator=dt, X_training=dataset0['x_t'], y_training=dataset0['y_t'])
dump_model_pkl(learner, 'dt', 0, 0)

dt = DecisionTreeClassifier(**{'criterion': 'gini', 'splitter': 'best', 'max_depth': 9, 'min_samples_split': 6, 'min_samples_leaf': 3, 'class_weight': {0: 0.2520408163265306, 1: 0.7479591836734694}})
learner = ActiveLearner(estimator=dt, X_training=dataset1['x_t'], y_training=dataset1['y_t'])
dump_model_pkl(learner, 'dt', 1, 0)


query_indices_set0 = []
query_indices_set1 = []

query_instances_set0 = []
query_instances_set1 = []

pickle.dump(query_indices_set0, Path('./dt/q_idx_set0.pkl').open('wb'))
pickle.dump(query_indices_set1, Path('./dt/q_idx_set1.pkl').open('wb'))

pickle.dump(query_instances_set0, Path('./dt/q_inst_set0.pkl').open('wb'))
pickle.dump(query_instances_set1, Path('./dt/q_inst_set1.pkl').open('wb'))

"""

## Set up model and data

In [23]:
dt_set0, iteration_set0 = get_model_pkl('dt', 0)
dt_set1, iteration_set1 = get_model_pkl('dt', 1)

query_indices_set0 = pickle.load(Path('./dt/q_idx_set0.pkl').open('rb'))
query_indices_set1 = pickle.load(Path('./dt/q_idx_set1.pkl').open('rb'))

query_instances_set0 = pickle.load(Path('./dt/q_inst_set0.pkl').open('rb'))
query_instances_set1 = pickle.load(Path('./dt/q_inst_set1.pkl').open('rb'))

x_stateset_set0 = np.genfromtxt(f'./statesets/scaled_stateset_{datetime.today().date()}.csv', delimiter=',')
x_stateset_set1 = np.genfromtxt(f'./statesets/scaled_stateset_{datetime.today().date()}.csv', delimiter=',')

rvol_raw_set0 = pd.read_csv('./extra_data/Me2NH2I_10uL_stateset.link.csv')
rvol_raw_set1 = pd.read_csv('./extra_data/Me2NH2I_10uL_stateset.link.csv')

ss_reagent_vols_set0 = rvol_raw_set0[[col for col in rvol_raw_set0.columns if 'Reagent' in col]]
ss_reagent_vols_set1 = rvol_raw_set1[[col for col in rvol_raw_set1.columns if 'Reagent' in col]]


#for idx in query_indices_set0:
ss_reagent_vols_set0.drop(ss_reagent_vols_set0.index[query_indices_set0], inplace=True)
x_stateset_set0 = np.delete(x_stateset_set0, query_indices_set0, 0)
    
#for idx in query_indices_set1:
ss_reagent_vols_set1.drop(ss_reagent_vols_set1.index[query_indices_set1], inplace=True)
x_stateset_set1 = np.delete(x_stateset_set1, query_indices_set1, 0)


Loading dt/dt_set0_it1_20201203-134407.pkl : Iteration 1
Loading dt/dt_set1_it1_20201203-134409.pkl : Iteration 1


[array([ 0.41898329,  0.13600087, -1.48552172, 45.4689449 ,  4.01110624,
        -1.25503314,  0.53140101,  0.45277056,  0.19477176,  1.90994895,
         0.42845509,  1.9369121 , -0.66195489, -0.51757147, -0.69207406,
        -0.19023062, -0.0756783 ,  0.16083379, -0.19574007, -2.10688602,
        -1.90816642, -1.99757251, -1.46767537, -0.48342343, -1.63443555,
        -1.64141015, -1.23899532, -2.20166869, -1.79895047, -0.95107561,
        -0.22080768, -0.92436418,  0.45675948, -1.38178365, -1.42735295,
        -0.25048972, -0.63286396, -0.86182278, -1.96785213, -0.63286396,
        -1.98146843, -0.28077616, -0.95901859, -0.99658496,  0.        ,
         0.        ,  1.        ,  0.        ,  1.        ,  1.        ]),
 array([ 4.18983288e-01,  2.79392013e-01, -1.51507371e+00,  4.54689449e+01,
         4.58461749e+00, -1.27308395e+00,  4.61060283e-01,  7.04437879e-01,
         2.67798991e-02,  1.94809366e+00,  5.62072010e-01,  1.98731580e+00,
        -5.89575746e-01, -5.74126472e-01

## DT Set 0 Get reagent volumes for next experiment

In [24]:
query_index, query_instance = dt_set0.query(x_stateset_set0)
print(ss_reagent_vols_set0.iloc[query_index])

   Reagent1 (ul)  Reagent2 (ul)  Reagent3 (ul)  Reagent4 (ul)  Reagent5 (ul)  \
0              0              0            220              0              0   

   Reagent6 (ul)  Reagent7 (ul)  Reagent8 (ul)  Reagent9 (ul)  
0              0            280              0              0  


In [8]:
# Once confirmed above add to list of existing indices
query_indices_set0.append(query_index)
query_instances_set0.append(query_instance)

[ 4.18983288e-01  2.49331447e-01 -1.53436577e+00  4.54689449e+01
  4.46438641e+00 -1.28486784e+00  5.09448104e-01  6.36072818e-01
 -1.05019157e-01  1.92173572e+00  4.58633829e-01  1.95144578e+00
 -6.05892697e-01 -6.12646912e-01 -6.84981303e-01 -1.54520304e-01
 -1.56969617e-02  1.60833790e-01 -1.95740073e-01 -2.10688602e+00
 -1.90816642e+00 -1.99757251e+00 -1.46767537e+00 -4.83423433e-01
 -1.63443555e+00 -1.64141015e+00 -1.23899532e+00 -2.20166869e+00
 -1.79895047e+00 -9.51075614e-01 -2.20807675e-01 -9.24364179e-01
  4.56759484e-01 -1.38178365e+00 -1.42735295e+00 -2.50489716e-01
 -6.32863963e-01 -8.61822782e-01 -1.96785213e+00 -6.32863963e-01
 -1.98146843e+00 -2.80776157e-01 -9.59018585e-01 -9.96584964e-01
  0.00000000e+00  0.00000000e+00  1.00000000e+00  0.00000000e+00
  1.00000000e+00  1.00000000e+00]
Reagent1 (ul)      9
Reagent2 (ul)     60
Reagent3 (ul)    151
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)    280
Reagent8 (ul)      0
Reagent9 (ul)     

## DT Set 0 Enter Crystal result
### Note: If crystal score is 4 => result = 1
### If crystal score is 1, 2 or 3 => result = 0

In [7]:
result = 0
dt_set0.teach(query_instance, [result])

## DT Set 1 Get reagent volumes for next experiment

In [25]:
query_index, query_instance = dt_set1.query(x_stateset_set1)
print(query_instance.shape)
print(query_index)
print(ss_reagent_vols_set1.iloc[query_index])

(1, 50)
[8894]
      Reagent1 (ul)  Reagent2 (ul)  Reagent3 (ul)  Reagent4 (ul)  \
8896            350              0             40              0   

      Reagent5 (ul)  Reagent6 (ul)  Reagent7 (ul)  Reagent8 (ul)  \
8896              0              0            110              0   

      Reagent9 (ul)  
8896              0  


In [19]:
query_indices_set1.append(query_index)
query_instances_set1.append(query_instance)

(1, 50)

## DT Set 1 Enter Crystal result
### Note: If crystal score is 4 => result = 1
### If crystal score is 1, 2 or 3 => result = 0

In [20]:
result = 0
dt_set1.teach(query_instance, [result])

## Finish Iteration Set 0

In [21]:
iteration_set0 += 1
dump_model_pkl(dt_set0, 'dt', 0, iteration_set0)

pickle.dump(query_indices_set0, Path('./dt/q_idx_set0.pkl').open('wb'))
pickle.dump(query_instances_set0, Path('./dt/q_inst_set0.pkl').open('wb'))

## Finish Iteration Set 1

In [22]:
iteration_set1 += 1
dump_model_pkl(dt_set1, 'dt', 1, iteration_set1)

pickle.dump(query_indices_set1, Path('./dt/q_idx_set1.pkl').open('wb'))
pickle.dump(query_instances_set1, Path('./dt/q_inst_set1.pkl').open('wb'))

In [11]:
x_stateset_set0 = np.genfromtxt(f'./statesets/scaled_stateset_{datetime.today().date()}.csv', delimiter=',')
ss_reagent_vols_set1.iloc[17833]

Reagent1 (ul)    420
Reagent2 (ul)      0
Reagent3 (ul)     60
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)     20
Reagent8 (ul)      0
Reagent9 (ul)      0
Name: 17833, dtype: int64

In [12]:
rvol_raw_set0 = pd.read_csv('./extra_data/Me2NH2I_10uL_stateset.link.csv')
ss_reagent_vols_set0 = rvol_raw_set0[[col for col in rvol_raw_set0.columns if 'Reagent' in col]]
ss_reagent_vols_set0.iloc[11441]

Reagent1 (ul)    324
Reagent2 (ul)     30
Reagent3 (ul)     66
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)     80
Reagent8 (ul)      0
Reagent9 (ul)      0
Name: 11441, dtype: int64

In [14]:
ss_reagent_vols_set0.iloc[9275]


Reagent1 (ul)    122
Reagent2 (ul)     40
Reagent3 (ul)    238
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)    100
Reagent8 (ul)      0
Reagent9 (ul)      0
Name: 9275, dtype: int64