In [1]:
from pathlib import Path
import sys
import pandas as pd
import numpy as np
#import qgrid
import pickle
import sklearn
from datetime import datetime
from sklearn.neighbors import KNeighborsClassifier
from modAL.models import ActiveLearner

package_path = Path.cwd().parent
if package_path not in sys.path:
    sys.path.append(str(package_path))

from utils.dataset_class import Phase3DataSet, Setting
from custom_pipeline import get_model_pkl, dump_model_pkl

In [2]:
training_raw = pd.read_csv('../data/raw/0057.perovskitedata_DRPFeatures_2020-07-02.csv')
phase3_training = pickle.load(open('phase3_dataset.pkl', 'rb'))

## Set up data

In [3]:
dataset0 = phase3_training.get_dataset('ALHk', 0, 'random')['JMXLWMIFDJCGBV-UHFFFAOYSA-N']
dataset1 = phase3_training.get_dataset('ALHk', 1, 'random')['JMXLWMIFDJCGBV-UHFFFAOYSA-N']


## Initial set up, uncomment if no model files exist

In [10]:
"""
knn = KNeighborsClassifier(**{'n_neighbors': 1, 'leaf_size': 1, 'p': 1})
learner = ActiveLearner(estimator=knn, X_training=dataset0['x_t'], y_training=dataset0['y_t'])
dump_model_pkl(learner, 'knn', 0, 0)

knn = KNeighborsClassifier(**{'n_neighbors': 1, 'leaf_size': 1, 'p': 1})
learner = ActiveLearner(estimator=knn, X_training=dataset1['x_t'], y_training=dataset1['y_t'])
dump_model_pkl(learner, 'knn', 1, 0)


query_indices_set0 = []
query_indices_set1 = []

query_instances_set0 = []
query_instances_set1 = []

pickle.dump(query_indices_set0, Path('./knn/q_idx_set0.pkl').open('wb'))
pickle.dump(query_indices_set1, Path('./knn/q_idx_set1.pkl').open('wb'))

pickle.dump(query_instances_set0, Path('./knn/q_inst_set0.pkl').open('wb'))
pickle.dump(query_instances_set1, Path('./knn/q_inst_set1.pkl').open('wb'))
"""



## Set up model and data

In [12]:
knn_set0, iteration_set0 = get_model_pkl('knn', 0)
knn_set1, iteration_set1 = get_model_pkl('knn', 1)

query_indices_set0 = pickle.load(Path('./knn/q_idx_set0.pkl').open('rb'))
query_indices_set1 = pickle.load(Path('./knn/q_idx_set1.pkl').open('rb'))

query_instances_set0 = pickle.load(Path('./knn/q_inst_set0.pkl').open('rb'))
query_instances_set1 = pickle.load(Path('./knn/q_inst_set1.pkl').open('rb'))

x_stateset_set0 = np.genfromtxt(f'./statesets/scaled_stateset_{datetime.today().date()}.csv', delimiter=',')
x_stateset_set1 = np.genfromtxt(f'./statesets/scaled_stateset_{datetime.today().date()}.csv', delimiter=',')

rvol_raw_set0 = pd.read_csv('./extra_data/Me2NH2I_10uL_stateset.link.csv')
rvol_raw_set1 = pd.read_csv('./extra_data/Me2NH2I_10uL_stateset.link.csv')

ss_reagent_vols_set0 = rvol_raw_set0[[col for col in rvol_raw_set0.columns if 'Reagent' in col]]
ss_reagent_vols_set1 = rvol_raw_set1[[col for col in rvol_raw_set1.columns if 'Reagent' in col]]

#for idx in query_indices_set0:
ss_reagent_vols_set0.drop(ss_reagent_vols_set0.index[query_indices_set0], inplace=True)
x_stateset_set0 = np.delete(x_stateset_set0, query_indices_set0, 0)
    
#for idx in query_indices_set1:
ss_reagent_vols_set1.drop(ss_reagent_vols_set1.index[query_indices_set1], inplace=True)
x_stateset_set1 = np.delete(x_stateset_set1, query_indices_set1, 0)
"""
for idx in query_indices_set0:
    ss_reagent_vols_set0.drop(ss_reagent_vols_set0.index[idx], inplace=True)
    x_stateset_set0 = np.delete(x_stateset_set0, idx, 0)
    
for idx in query_indices_set1:
    ss_reagent_vols_set1.drop(ss_reagent_vols_set1.index[idx], inplace=True)
    x_stateset_set1 = np.delete(x_stateset_set1, idx, 0)
"""

Loading knn/knn_set0_it1_20201203-133116.pkl : Iteration 1
Loading knn/knn_set1_it1_20201203-133117.pkl : Iteration 1


'\nfor idx in query_indices_set0:\n    ss_reagent_vols_set0.drop(ss_reagent_vols_set0.index[idx], inplace=True)\n    x_stateset_set0 = np.delete(x_stateset_set0, idx, 0)\n    \nfor idx in query_indices_set1:\n    ss_reagent_vols_set1.drop(ss_reagent_vols_set1.index[idx], inplace=True)\n    x_stateset_set1 = np.delete(x_stateset_set1, idx, 0)\n'

[0, 1]
[array([ 0.41898329,  0.42731908, -1.69238563, 45.4689449 ,  5.17627073,
       -1.38138883,  0.58229367,  0.87416943, -1.09363898,  1.88302572,
        0.27109044,  1.89542677, -0.52156015, -0.92160176, -0.73585604,
       -0.27446385, -0.26972947,  0.16083379, -0.19574007, -2.10688602,
       -1.90816642, -1.99757251, -1.46767537, -0.48342343, -1.63443555,
       -1.64141015, -1.23899532, -2.20166869, -1.79895047, -0.95107561,
       -0.22080768, -0.92436418,  0.45675948, -1.38178365, -1.42735295,
       -0.25048972, -0.63286396, -0.86182278, -1.96785213, -0.63286396,
       -1.98146843, -0.28077616, -0.95901859, -0.99658496,  0.        ,
        0.        ,  1.        ,  0.        ,  1.        ,  1.        ]), array([ 4.18983288e-01,  3.58017722e-01, -1.54462570e+00,  4.54689449e+01,
        4.89909108e+00, -1.29113477e+00,  4.40108443e-01,  8.36887414e-01,
       -1.55892868e-01,  1.95967100e+00,  5.96724578e-01,  2.00206078e+00,
       -5.50307246e-01, -6.31748929e-01, -6.5

## KNN Set 0 Get reagent volumes for next experiment

In [15]:
query_index, query_instance = knn_set0.query(x_stateset_set0)
print(query_index)
print(ss_reagent_vols_set0.iloc[query_index[0]])

[0]
Reagent1 (ul)      2
Reagent2 (ul)     40
Reagent3 (ul)    178
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)    280
Reagent8 (ul)      0
Reagent9 (ul)      0
Name: 2, dtype: int64


In [None]:
# Once experiment is confirmed
query_indices_set0.append(query_index)
query_instances_set0.append(query_instance)

## KNN Set 0 Enter Crystal result
### Note: If crystal score is 4 => result = 1
### If crystal score is 1, 2 or 3 => result = 0

In [6]:
result = 0
knn_set0.teach(query_instance, [result])

## KNN Set 1 Get reagent volumes for next experiment

In [6]:
query_indices_set1

[array([0])]

In [14]:
query_index, query_instance = knn_set1.query(x_stateset_set1)
print(query_index)
print(ss_reagent_vols_set1.iloc[query_index[0]])

[0]
Reagent1 (ul)      2
Reagent2 (ul)     40
Reagent3 (ul)    178
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)    280
Reagent8 (ul)      0
Reagent9 (ul)      0
Name: 2, dtype: int64


In [None]:
query_indices_set1.append(query_index)
query_instances_set1.append(query_instance)

## KNN Set 1 Enter Crystal result
### Note: If crystal score is 4 => result = 1
### If crystal score is 1, 2 or 3 => result = 0

In [10]:
result = 0
knn_set1.teach(query_instance, [result])

## Finish Iteration Set 0

In [11]:
iteration_set0 += 1
dump_model_pkl(knn_set0, 'knn', 0, iteration_set0)

pickle.dump(query_indices_set0, Path('./knn/q_idx_set0.pkl').open('wb'))
pickle.dump(query_instances_set0, Path('./knn/q_inst_set0.pkl').open('wb'))

## Finish Iteration Set 1

In [12]:
iteration_set1 += 1
dump_model_pkl(knn_set1, 'knn', 1, iteration_set1)

pickle.dump(query_indices_set1, Path('./knn/q_idx_set1.pkl').open('wb'))
pickle.dump(query_instances_set1, Path('./knn/q_inst_set1.pkl').open('wb'))