In [4]:
from pathlib import Path
import sys
import pandas as pd
import numpy as np
#import qgrid
import pickle
import sklearn
from datetime import datetime
#from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from modAL.models import ActiveLearner

package_path = Path.cwd().parent
if package_path not in sys.path:
    sys.path.append(str(package_path))

from utils.dataset_class import Phase3DataSet, Setting
from custom_pipeline import get_model_pkl, dump_model_pkl

In [5]:
training_raw = pd.read_csv('../data/raw/0057.perovskitedata_DRPFeatures_2020-07-02.csv')
phase3_training = pickle.load(open('phase3_dataset.pkl', 'rb'))

## Set up data

In [6]:
dataset0 = phase3_training.get_dataset('ALHk', 0, 'random')['JMXLWMIFDJCGBV-UHFFFAOYSA-N']
dataset1 = phase3_training.get_dataset('ALHk', 1, 'random')['JMXLWMIFDJCGBV-UHFFFAOYSA-N']


## Initial set up, uncomment if no model files exist

In [8]:
"""
dt = DecisionTreeClassifier(**{'criterion': 'gini', 'splitter': 'best', 'max_depth': 9, 'min_samples_split': 6, 'min_samples_leaf': 3, 'class_weight': {0: 0.2520408163265306, 1: 0.7479591836734694}})
learner = ActiveLearner(estimator=dt, X_training=dataset0['x_t'], y_training=dataset0['y_t'])
dump_model_pkl(learner, 'dt', 0, 0)

dt = DecisionTreeClassifier(**{'criterion': 'gini', 'splitter': 'best', 'max_depth': 9, 'min_samples_split': 6, 'min_samples_leaf': 3, 'class_weight': {0: 0.2520408163265306, 1: 0.7479591836734694}})
learner = ActiveLearner(estimator=dt, X_training=dataset1['x_t'], y_training=dataset1['y_t'])
dump_model_pkl(learner, 'dt', 1, 0)

query_indices_set0 = []
query_indices_set1 = []

query_instances_set0 = []
query_instances_set1 = []

pickle.dump(query_indices_set0, Path('./dt/q_idx_set0.pkl').open('wb'))
pickle.dump(query_indices_set1, Path('./dt/q_idx_set1.pkl').open('wb'))

pickle.dump(query_instances_set0, Path('./dt/q_inst_set0.pkl').open('wb'))
pickle.dump(query_instances_set1, Path('./dt/q_inst_set1.pkl').open('wb'))

"""


## Set up model and data

In [9]:
dt_set0, iteration_set0 = get_model_pkl('dt', 0)
dt_set1, iteration_set1 = get_model_pkl('dt', 1)

query_indices_set0 = pickle.load(Path('./dt/q_idx_set0.pkl').open('rb'))
query_indices_set1 = pickle.load(Path('./dt/q_idx_set1.pkl').open('rb'))

query_instances_set0 = pickle.load(Path('./dt/q_inst_set0.pkl').open('rb'))
query_instances_set1 = pickle.load(Path('./dt/q_inst_set1.pkl').open('rb'))

x_stateset_set0 = np.genfromtxt(f'./statesets/scaled_stateset_{datetime.today().date()}.csv', delimiter=',')
x_stateset_set1 = np.genfromtxt(f'./statesets/scaled_stateset_{datetime.today().date()}.csv', delimiter=',')

rvol_raw_set0 = pd.read_csv('./extra_data/Me2NH2I_10uL_stateset.link.csv')
rvol_raw_set1 = pd.read_csv('./extra_data/Me2NH2I_10uL_stateset.link.csv')

ss_reagent_vols_set0 = rvol_raw_set0[[col for col in rvol_raw_set0.columns if 'Reagent' in col]]
ss_reagent_vols_set1 = rvol_raw_set1[[col for col in rvol_raw_set1.columns if 'Reagent' in col]]

for idx in query_indices_set0:
    ss_reagent_vols_set0.drop(ss_reagent_vols_set0.index[idx], inplace=True)
    x_stateset_set0 = np.delete(x_stateset_set0, idx, 0)
    
for idx in query_indices_set1:
    ss_reagent_vols_set1.drop(ss_reagent_vols_set1.index[idx], inplace=True)
    x_stateset_set1 = np.delete(x_stateset_set1, idx, 0)

Loading dt/dt_set0_it0_20201203-064124.pkl : Iteration 0
Loading dt/dt_set1_it0_20201203-064124.pkl : Iteration 0


## DT Set 0 Get reagent volumes for next experiment

In [10]:
query_index, query_instance = dt_set0.query(x_stateset_set0)
query_indices_set0.append(query_index)
query_instances_set0.append(query_instance)

print(ss_reagent_vols_set0.iloc[query_index])

    Reagent1 (ul)  Reagent2 (ul)  Reagent3 (ul)  Reagent4 (ul)  Reagent5 (ul)  \
10              7             70            143              0              0   

    Reagent6 (ul)  Reagent7 (ul)  Reagent8 (ul)  Reagent9 (ul)  
10              0            280              0              0  


## DT Set 0 Enter Crystal result
### Note: If crystal score is 4 => result = 1
### If crystal score is 1, 2 or 3 => result = 0

In [11]:
result = 0
dt_set0.teach(query_instance, [result])

## DT Set 1 Get reagent volumes for next experiment

In [12]:
query_index, query_instance = dt_set1.query(x_stateset_set1)
query_indices_set1.append(query_index)
query_instances_set1.append(query_instance)

print(ss_reagent_vols_set1.iloc[query_index])

    Reagent1 (ul)  Reagent2 (ul)  Reagent3 (ul)  Reagent4 (ul)  Reagent5 (ul)  \
70             39             60            121              0              0   

    Reagent6 (ul)  Reagent7 (ul)  Reagent8 (ul)  Reagent9 (ul)  
70              0            280              0              0  


## DT Set 1 Enter Crystal result
### Note: If crystal score is 4 => result = 1
### If crystal score is 1, 2 or 3 => result = 0

In [14]:
result = 0
dt_set1.teach(query_instance, [result])

## Finish Iteration Set 0

In [10]:
iteration_set0 += 1
dump_model_pkl(dt_set0, 'dt', 0, iteration_set0)

pickle.dump(query_indices_set0, Path('./dt/q_idx_set0.pkl').open('wb'))
pickle.dump(query_instances_set0, Path('./dt/q_inst_set0.pkl').open('wb'))

## Finish Iteration Set 1

In [11]:
iteration_set1 += 1
dump_model_pkl(dt_set1, 'dt', 1, iteration_set1)

pickle.dump(query_indices_set1, Path('./dt/q_idx_set1.pkl').open('wb'))
pickle.dump(query_instances_set1, Path('./dt/q_inst_set1.pkl').open('wb'))