In [8]:
from pathlib import Path
import sys
import pandas as pd
import numpy as np
import pickle
import sklearn
from datetime import datetime, timedelta
import torch

package_path = Path.cwd().parent
if package_path not in sys.path:
    sys.path.append(str(package_path))

from utils.dataset_class import Phase3DataSet, Setting
from phase3.scripts.custom_pipeline import get_model_pkl, dump_model_pkl
from models.meta.platipus_class import Platipus
from hpc_scripts.param_generator import get_all_params
from models.meta.init_params import init_params

In [9]:
x_stateset = {}
rvol_raw = {}
ss_reagent_vols = {}
all_params = get_all_params(cross_validate=False)
params = all_params[172]
amine = ['JMXLWMIFDJCGBV-UHFFFAOYSA-N']
train_params = {}
platipus = {}
device = {}

for i in range(2):
    x_stateset[i] = np.genfromtxt(f'./statesets/scaled_stateset_{datetime.today().date() - timedelta(days=1)}.csv', delimiter=',')
    rvol_raw[i] = pd.read_csv('./extra_data/Me2NH2I_10uL_stateset.link.csv')
    ss_reagent_vols[i] = rvol_raw[i][[col for col in rvol_raw[i].columns if 'Reagent' in col]] 
    
    train_params[i] = init_params(params)
    train_params[i]['gpu_id'] = i
    platipus[i] = Platipus(train_params[i], amine=amine,
                            model_name=train_params[i]['model_name'],
                            epoch_al=True)
    platipus[i].load_model('./plat_trained_model/testing/drp_chem_10shot_4000.pkl')
    platipus[i].current_epoch = 4000
    device[i] = torch.device(f"cuda:{i}")


Meta learning rate = 0.001
Getting FULL dataset
Holding out ['CALQKRVFTWDYDG-UHFFFAOYSA-N', 'KOAGKPNEVYEZDU-UHFFFAOYSA-N', 'FCTHQYIDLRRROX-UHFFFAOYSA-N', 'JMXLWMIFDJCGBV-UHFFFAOYSA-N']
Number of reactions in training set 1477
Number of successful reactions in the training set 215
Generating training batches
Generating testing batches for testing! DO NOT RUN IF YOU SEE THIS LINE!
Meta learning rate = 0.001
Getting FULL dataset
Holding out ['CALQKRVFTWDYDG-UHFFFAOYSA-N', 'KOAGKPNEVYEZDU-UHFFFAOYSA-N', 'FCTHQYIDLRRROX-UHFFFAOYSA-N', 'JMXLWMIFDJCGBV-UHFFFAOYSA-N']
Number of reactions in training set 1477
Number of successful reactions in the training set 215
Generating training batches
Generating testing batches for testing! DO NOT RUN IF YOU SEE THIS LINE!


In [6]:
"""
query_indices_set0 = [10022, 1441]
query_indices_set1 = [17833, 9275]
query_instances_set0 = []
query_instances_set1 = []
for idx in query_indices_set0:
    query_instances_set0.append(x_stateset_set0[idx])
for idx in query_indices_set1:
    query_instances_set1.append(x_stateset_set0[idx])
"""

In [3]:
results = {}
results[0] = {10022:1, 1441:0, 6127:0, 9716:0, 8205:0, 7177:0, 17269:1, 15434:0, 10040:1, 9958:0}
results[1] = {17833:0, 9275:0, 6470:0, 2378:1, 6890:0, 17171:0, 16771:0, 14533:0, 5180:1, 13862:0}

xss_torch0 = {}
xss_torch1 = {}
x_t = {}
y_t = {}
for i in range(2):
    iters, all_data, all_labels, x_train, y_train, x_v, y_v = platipus[i].setup_active_learning(i, 'JMXLWMIFDJCGBV-UHFFFAOYSA-N')
    x_t[i] = x_train
    y_t[i] = y_train
    xss_torch0[i] = torch.from_numpy(x_stateset[i][:10000]).float().to(device[i])
    xss_torch1[i] = torch.from_numpy(x_stateset[i][10000:]).float().to(device[i])
    for exp_num in results[i]:
        
        if exp_num >= 10000:
            index = exp_num-10000
            x_t[i] = torch.cat((x_t[i], xss_torch1[i][index].view(1, 50)))
            y_t[i] = torch.cat((y_t[i], torch.from_numpy(np.array([results[i][exp_num]])).to(device[i])))
            xss_torch1[i] = torch.cat([xss_torch1[i][0:index], xss_torch1[i][index + 1:]])
            ss_reagent_vols[i].drop(ss_reagent_vols[i].index[exp_num], inplace=True)
        else:
            index = exp_num
            x_t[i] = torch.cat((x_t[i], xss_torch0[i][exp_num].view(1, 50)))
            y_t[i] = torch.cat((y_t[i], torch.from_numpy(np.array([results[i][exp_num]])).to(device[i])))
            xss_torch0[i] = torch.cat([xss_torch0[i][0:index], xss_torch0[i][index + 1:]])
            ss_reagent_vols[i].drop(ss_reagent_vols[i].index[exp_num], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [4]:
value0, index0 = platipus[0].phase3_active_learning(x_t[0], y_t[0], xss_torch0[0], debug=True)
print(index0.item(), value0.item())
del value0
torch.cuda.empty_cache()

tensor([0.6230, 0.6185, 0.6179,  ..., 0.5766, 0.5826, 0.7755], device='cuda:0',
       grad_fn=<MaxBackward0>)
9953 0.5765551924705505


In [5]:
value0, index0 = platipus[0].phase3_active_learning(x_t[0], y_t[0], xss_torch1[0], debug=True)
print(index0.item(), value0.item())
del value0
torch.cuda.empty_cache()

tensor([0.7809, 0.7791, 0.7773,  ..., 0.6167, 0.6142, 0.6172], device='cuda:0',
       grad_fn=<MaxBackward0>)
642 0.5791086554527283


In [7]:
value0, index0 = platipus[1].phase3_active_learning(x_t[1], y_t[1], xss_torch0[1], debug=True)
print(index0.item(), value0.item())
del value0
torch.cuda.empty_cache()

tensor([0.7019, 0.7321, 0.7256,  ..., 0.5237, 0.5296, 0.6998], device='cuda:1',
       grad_fn=<MaxBackward0>)
9720 0.522106945514679


In [8]:
value0, index0 = platipus[1].phase3_active_learning(x_t[1], y_t[1], xss_torch1[1], debug=True)
print(index0.item(), value0.item())
del value0
torch.cuda.empty_cache()

tensor([0.6865, 0.6806, 0.6740,  ..., 0.5732, 0.5703, 0.5679], device='cuda:1',
       grad_fn=<MaxBackward0>)
3857 0.5000147223472595


In [11]:
import random
random.randint(0, 20000)

17168

In [7]:
value0, index0 = platipus_set0.phase3_active_learning(x_t, y_t, xss_torch0)

In [8]:
print(value0, index0)

tensor(0.5261, device='cuda:0', grad_fn=<MinBackward0>) tensor(9716, device='cuda:0')


In [9]:
del value0
del index0
torch.cuda.empty_cache()

In [6]:
del xss_torch0
torch.cuda.empty_cache()

In [10]:
#xss_torch1 = torch.from_numpy(x_stateset_set0[10000:]).float().to(device_set0)
value1, index1 = platipus_set0.phase3_active_learning(x_t, y_t, xss_torch1)

In [11]:
print(value1, index1)
del value1
torch.cuda.empty_cache()

tensor(nan, device='cuda:0', grad_fn=<MinBackward0>) tensor(0, device='cuda:0')


In [6]:
ss_reagent_vols[0].iloc[9953]

Reagent1 (ul)     88
Reagent2 (ul)     10
Reagent3 (ul)    312
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)     90
Reagent8 (ul)      0
Reagent9 (ul)      0
Name: 9958, dtype: int64

In [9]:
ss_reagent_vols[1].iloc[13857]

Reagent1 (ul)    113
Reagent2 (ul)    140
Reagent3 (ul)    197
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)     50
Reagent8 (ul)      0
Reagent9 (ul)      0
Name: 13862, dtype: int64

In [9]:
#value, index = platipus.phase3_active_learning(x_t, y_t, xss_torch)
value, index

(tensor(0.5000, device='cuda:1', grad_fn=<MinBackward0>),
 tensor(7833, device='cuda:1'))

In [7]:
x_t = torch.cat((x_t, xss_torch[7833].view(1, 50)))

In [8]:
y_t = torch.cat((y_t, torch.from_numpy(np.array([0])).to(device)))

In [9]:
y_t.shape

torch.Size([11])

In [10]:
value, index = platipus.phase3_active_learning(x_t, y_t, xss_torch)

In [11]:
print(value, index)

tensor(0.5556, device='cuda:1', grad_fn=<MinBackward0>) tensor(9275, device='cuda:1')


In [16]:
set_id = 1 
iters, all_data, all_labels, x_t, y_t, x_v, y_v = platipus[0].setup_active_learning(set_id, 'JMXLWMIFDJCGBV-UHFFFAOYSA-N')

In [18]:
xss_torch0 = torch.from_numpy(x_stateset[0][:10000]).float().to(device[0])
value0, index0 = platipus[0].phase3_active_learning(x_t, y_t, xss_torch0)

In [19]:
print(value0, index0)
del value0
torch.cuda.empty_cache()

tensor(0.5000, device='cuda:0', grad_fn=<MinBackward0>) tensor(8205, device='cuda:0')


In [19]:
xss_torch1 = torch.from_numpy(x_stateset_set1[10000:]).float().to(device_set1)
value1, index1 = platipus_set1.phase3_active_learning(x_t, y_t, xss_torch1)

In [20]:
print(value1, index1)
del value1
torch.cuda.empty_cache()

tensor(0.5000, device='cuda:1', grad_fn=<MinBackward0>) tensor(6125, device='cuda:1')


In [20]:
rvol_raw =  pd.read_csv('./extra_data/Me2NH2I_10uL_stateset.link.csv')
ss_reagent_vols = rvol_raw[[col for col in rvol_raw.columns if 'Reagent' in col]]
ss_reagent_vols.iloc[8205]

Reagent1 (ul)     46
Reagent2 (ul)     20
Reagent3 (ul)    324
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)    110
Reagent8 (ul)      0
Reagent9 (ul)      0
Name: 8205, dtype: int64

## PLATIPUS SET 1

In [13]:
set_id = 1
iters, all_data, all_labels, x_t, y_t, x_v, y_v = platipus_set1.setup_active_learning(set_id, 'JMXLWMIFDJCGBV-UHFFFAOYSA-N')

In [14]:
xss_torch0 = torch.from_numpy(x_stateset_set0[:10000]).float().to(device_set1)
xss_torch1 = torch.from_numpy(x_stateset_set0[10000:]).float().to(device_set1)

In [15]:
results = {
    17833:0,
    9275:0,
    6470:0,
}
for exp_num in results:
    if exp_num >= 10000:
        x_t = torch.cat((x_t, xss_torch1[exp_num-10000].view(1, 50)))
        y_t = torch.cat((y_t, torch.from_numpy(np.array([results[exp_num]])).to(device_set1)))
    else:
        x_t = torch.cat((x_t, xss_torch1[exp_num].view(1, 50)))
        y_t = torch.cat((y_t, torch.from_numpy(np.array([results[exp_num]])).to(device_set1)))

In [16]:
value0, index0 = platipus_set1.phase3_active_learning(x_t, y_t, xss_torch0)
print(value0, index0)
del value0
del index0
torch.cuda.empty_cache()

tensor(0.5018, device='cuda:1', grad_fn=<MinBackward0>) tensor(2378, device='cuda:1')


In [17]:
value1, index1 = platipus_set1.phase3_active_learning(x_t, y_t, xss_torch1)
print(value1, index1)
del value1
torch.cuda.empty_cache()

tensor(0.6820, device='cuda:1', grad_fn=<MinBackward0>) tensor(589, device='cuda:1')


In [18]:
ss_reagent_vols_set0.iloc[2378]

Reagent1 (ul)      6
Reagent2 (ul)    280
Reagent3 (ul)      4
Reagent4 (ul)      0
Reagent5 (ul)      0
Reagent6 (ul)      0
Reagent7 (ul)    210
Reagent8 (ul)      0
Reagent9 (ul)      0
Name: 2378, dtype: int64

In [10]:
results = {}
results[0] = {10022:1, 1441:0, 6127:0, 9716:0, 8205:0, 7177:0, 17269:1, 15434:0, 10040:1, 9958:0}
results[1] = {17833:0, 9275:0, 6470:0, 2378:1, 6890:0, 17171:0, 16771:0, 14533:0, 5180:1, 13862:0}
phase3dataset = pickle.load(open('../data/phase3_dataset.pkl', 'rb'))
dataset0 = phase3dataset.get_dataset('ALHk', 0, 'random')['JMXLWMIFDJCGBV-UHFFFAOYSA-N']
dataset1 = phase3dataset.get_dataset('ALHk', 1, 'random')['JMXLWMIFDJCGBV-UHFFFAOYSA-N']

In [14]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, balanced_accuracy_score
def predict(x_t, y_t, set_id, all_data, all_labels): 
    phi = platipus[set_id].setup_weight_dist(x_t, y_t)
    prob_pred, labels_pred = platipus[set_id].predict(all_data, phi=phi, proba=True)
    correct = labels_pred == all_labels

    y_true = all_labels
    y_pred = labels_pred.detach().cpu().numpy()
    print(y_true, y_pred)
    print(prob_pred)
    cm = confusion_matrix(y_true, y_pred)
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    bcr = balanced_accuracy_score(y_true, y_pred)
    
    return accuracy, precision, recall, bcr

In [12]:
xss_torch0 = {}
xss_torch1 = {}
x_t = {}
y_t = {}
for i in range(2):
    iters, all_data, all_labels, x_train, y_train, x_v, y_v = platipus[i].setup_active_learning(i, 'JMXLWMIFDJCGBV-UHFFFAOYSA-N')
    x_t[i] = x_train
    y_t[i] = y_train
    xss_torch0[i] = torch.from_numpy(x_stateset[i][:10000]).float().to(device[i])
    xss_torch1[i] = torch.from_numpy(x_stateset[i][10000:]).float().to(device[i])
    for exp_num in results[i]:
        if exp_num >= 10000:
            index = exp_num-10000
            x_t[i] = torch.cat((x_t[i], xss_torch1[i][index].view(1, 50)))
            y_t[i] = torch.cat((y_t[i], torch.from_numpy(np.array([results[i][exp_num]])).to(device[i])))
        else:
            index = exp_num
            x_t[i] = torch.cat((x_t[i], xss_torch0[i][exp_num].view(1, 50)))
            y_t[i] = torch.cat((y_t[i], torch.from_numpy(np.array([results[i][exp_num]])).to(device[i])))

In [15]:
predict(x_t[0], y_t[0], 0, dataset0['x_v'], dataset0['y_v'])

[0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0
 1 0 0 0 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0] [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
tensor([0.6772, 0.6584, 0.5430, 0.7584, 0.6428, 0.6252, 0.5757, 0.6962, 0.6380,
        0.6023, 0.5885, 0.7045, 0.6839, 0.7567, 0.5814, 0.6329, 0.6521, 0.5508,
        0.5646, 0.6967, 0.6314, 0.6347, 0.6066, 0.5593, 0.6097, 0.6203, 0.6814,
        0.6769, 0.5656, 0.6008, 0.7260, 0.6049, 0.6703, 0.6096, 0.6033, 0.7606,
        0.6222, 0.7656, 0.5302, 0.5915, 0.5345, 0.6655, 0.7278, 0.6027, 0.5924,
        0.6385, 0.5791, 0.5876, 0.6252, 0.6611, 0.5443, 0.7558, 0.5634, 0.6753,
        0.5514, 0.5649, 0.6506, 0.6827, 0.5735, 0.7133, 0.6483, 0.5572, 0.7157,
        0.6248, 0.5687, 0.5831, 0.6426, 0.6908, 0.61

(0.15789473684210525, 0.15789473684210525, 1.0, 0.5)