In [100]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [101]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import olympus
from olympus.datasets import Dataset, list_datasets
from olympus.emulators import Emulator
from olympus.models import BayesNeuralNet
from olympus.surfaces import Surface
from olympus.scalarizers import Scalarizer
from olympus.campaigns import Campaign
from olympus.planners import Planner
from olympus.evaluators import Evaluator

In [15]:

dataset = Dataset(kind='snar')

In [16]:
dataset.data

Unnamed: 0,residence_time,ratio,concentration,temperature,impurity
0,1.24,2.43,0.361,64.8,2.06
1,0.81,4.59,0.484,71.6,1.94
2,1.49,3.31,0.194,79.4,2.03
3,1.04,1.27,0.334,80.7,1.89
4,1.96,3.19,0.229,84.2,1.37
...,...,...,...,...,...
61,0.50,2.49,0.500,139.6,0.78
62,0.50,3.01,0.500,140.0,0.95
63,0.50,2.76,0.500,140.0,0.86
64,0.50,3.28,0.500,140.0,1.03


In [17]:
list_datasets()

['agnp',
 'alkox',
 'autoam',
 'benzylation',
 'colors_bob',
 'colors_n9',
 'crossed_barrel',
 'dye_lasers',
 'fullerenes',
 'hplc',
 'oer_plate_3496',
 'oer_plate_3851',
 'oer_plate_3860',
 'oer_plate_4098',
 'p3ht',
 'perovskites',
 'photo_pce10',
 'photo_wf3',
 'redoxmers',
 'snar',
 'suzuki',
 'thin_film']

In [18]:
dataset = Dataset(kind='redoxmers')

In [19]:
dataset.data

Unnamed: 0,r1_label,r3_label,r4_label,r5_label,abs_lam_diff,ered,gsol
0,R1_0,R3_0,R4_0,R5_0,39.96,1.684123,-0.681801
1,R1_0,R3_0,R4_0,R5_1,63.92,1.963624,-0.711542
2,R1_0,R3_0,R4_0,R5_2,51.76,2.044655,-0.8874
3,R1_0,R3_0,R4_0,R5_3,36.93,1.731604,-0.710235
4,R1_0,R3_0,R4_0,R5_4,53.79,1.844226,-0.748112
...,...,...,...,...,...,...,...
1402,R1_1,R3_7,R4_7,R5_6,28.6,1.953747,-0.645258
1403,R1_1,R3_7,R4_7,R5_7,12.22,1.921802,-0.576308
1404,R1_1,R3_7,R4_7,R5_8,25.32,1.895517,-0.614102
1405,R1_1,R3_7,R4_7,R5_9,44.07,2.129904,-0.731976


In [20]:
dataset._targets
dataset.target_names
dataset.feature_names
dataset.targets_dim 
dataset.features_dim

4

In [21]:
dataset.train_set
dataset.test_set
dataset.train_set_features
dataset.train_set_targets

Unnamed: 0,abs_lam_diff,ered,gsol
0,39.96,1.684123,-0.681801
2,51.76,2.044655,-0.8874
3,36.93,1.731604,-0.710235
4,53.79,1.844226,-0.748112
5,24.07,1.831274,-0.740439
...,...,...,...
1402,28.6,1.953747,-0.645258
1403,12.22,1.921802,-0.576308
1404,25.32,1.895517,-0.614102
1405,44.07,2.129904,-0.731976


## Test MOO runs on fully categorical dataset (`dye_lasers`)

In [74]:
dataset = Dataset(kind='dye_lasers')

In [75]:
dataset.data.head()

Unnamed: 0,frag_a,frag_b,frag_c,peak_score,spectral_overlap,fluo_rate
0,OB(O)c1ccnc(F)c1,C[N+]12CC(=O)O[B-]1(c1cccc(Br)c1-n1c3ccccc3c3c...,Brc1cc(Br)c2ccccc2c1,0.504702,0.087858,0.001269
1,OB(O)c1ccnc(F)c1,C[N+]12CC(=O)O[B-]1(c1cccc(Br)c1-n1c3ccccc3c3c...,Clc1cc(Br)sc1Br,0.149585,0.061804,0.0082
2,OB(O)c1ccnc(F)c1,C[N+]12CC(=O)O[B-]1(c1cccc(Br)c1-n1c3ccccc3c3c...,Cc1nc2c(Br)c3nsnc3c(Br)c2nc1C,0.001274,0.028725,0.000253
3,OB(O)c1ccnc(F)c1,C[N+]12CC(=O)O[B-]1(c1cccc(Br)c1-n1c3ccccc3c3c...,Brc1ccccc1Oc1ccccc1Br,0.737431,0.082928,0.006326
4,OB(O)c1ccnc(F)c1,C[N+]12CC(=O)O[B-]1(c1cccc(Br)c1-n1c3ccccc3c3c...,CC(C)(C)c1ccc2c(Br)c3ccccc3c(Br)c2c1,0.41934,0.387919,0.063529


In [76]:
dataset.goal, dataset.target_names

(['maximize', 'minimize', 'maximize'],
 ['peak_score', 'spectral_overlap', 'fluo_rate'])

In [77]:
print(dataset.value_space)

Continuous (name='peak_score', low=0.0, high=1.0, is_periodic=False)
Continuous (name='spectral_overlap', low=0.0, high=1.0, is_periodic=False)
Continuous (name='fluo_rate', low=0.0, high=1.0, is_periodic=False)


In [78]:
# initialize planner
planner = Planner(kind='RandomSearch', goal='minimize')
planner.set_param_space(dataset.param_space)

# initialize scalarizing function
tolerances = [0.2, 0.5, 0.5]
absolutes = [False, False, False]
goals = ['max', 'min', 'max']

scalarizer = Scalarizer(
    kind='Chimera', 
    value_space=dataset.value_space,  
    tolerances=tolerances, 
    absolutes=absolutes,
    goals=goals,
)

campaign = Campaign()
campaign.set_param_space(dataset.param_space)
campaign.set_value_space(dataset.value_space)

In [79]:
evaluator = Evaluator(
    planner=planner, 
    emulator=dataset,
    campaign=campaign,
    scalarizer=scalarizer,
)

In [80]:
evaluator.optimize(num_iter=5)

PARAMS :  ['OB(O)c1ccc(F)c2ccccc12' 'C[N+]12CC(=O)O[B-]1(c1cc3cc(Br)ccc3o1)OC(=O)C2'
 'C[Si](C)(c1ccc(Br)cc1)c1ccc(Br)cc1']
PARAM TYPE :  <class 'numpy.ndarray'>
PROCESSED PARAMS :  [array(['OB(O)c1ccc(F)c2ccccc12',
       'C[N+]12CC(=O)O[B-]1(c1cc3cc(Br)ccc3o1)OC(=O)C2',
       'C[Si](C)(c1ccc(Br)cc1)c1ccc(Br)cc1'], dtype='<U46')]
frag_a frag_a OB(O)c1ccc(F)c2ccccc12 <class 'numpy.str_'>
frag_b frag_b C[N+]12CC(=O)O[B-]1(c1cc3cc(Br)ccc3o1)OC(=O)C2 <class 'numpy.str_'>
frag_c frag_c C[Si](C)(c1ccc(Br)cc1)c1ccc(Br)cc1 <class 'numpy.str_'>
RET VALUES :  [ParamVector(peak_score = 0.0145553912437697, spectral_overlap = 0.6044614505435763, fluo_rate = 0.5646149852616343)]
 
PARAMS :  ['OB(O)c1cc(-n2c3ccccc3c3ccccc32)cc(-n2c3ccccc3c3ccccc32)c1'
 'C[N+]12CC(=O)O[B-]1(c1c(F)cccc1Br)OC(=O)C2'
 'CCCCC(CC)CC1(CC(CC)CCCC)c2cc(I)ccc2-c2ccc(I)cc21']
PARAM TYPE :  <class 'numpy.ndarray'>
PROCESSED PARAMS :  [array(['OB(O)c1cc(-n2c3ccccc3c3ccccc32)cc(-n2c3ccccc3c3ccccc32)c1',
       'C[N+]12CC(=O)O[B-

## Single-objective continuous optimzation through `Evaluator.optimize()`

In [52]:
emulator = Emulator(dataset='snar', model='BayesNeuralNet')

planner = Planner(kind='RandomSearch', goal=emulator.goal)
planner.set_param_space(emulator.param_space)

campaign = Campaign()
campaign.set_param_space(emulator.param_space)

[0;37m[INFO] Loading emulator using a BayesNeuralNet model for the dataset snar...
[0m

  trainable=trainable)
  trainable=trainable)


In [53]:
evaluator = Evaluator(
    planner=planner,
    emulator=emulator,
    campaign=campaign,
)

In [54]:
evaluator.optimize(num_iter=5)

In [55]:
campaign

<Campaign (dataset=snar, model=BayesNeuralNet, planner=RandomSearch, num_iter=5)>

## Single-objective categorical optimization through `Evaluator.optimize()`

In [59]:
dataset = Dataset(kind='perovskites')

planner = Planner(kind='RandomSearch', goal=dataset.goal)
planner.set_param_space(dataset.param_space)

campaign = Campaign()
campaign.set_param_space(dataset.param_space)

In [60]:
evaluator = Evaluator(
    planner=planner,
    emulator=dataset,
    campaign=campaign,
)

In [61]:
evaluator.optimize(num_iter=5)

PARAMS :  ['propylammonium' 'Sn' 'F']
PARAM TYPE :  <class 'numpy.ndarray'>
PROCESSED PARAMS :  [array(['propylammonium', 'Sn', 'F'], dtype='<U14')]
organic organic propylammonium <class 'numpy.str_'>
cation cation Sn <class 'numpy.str_'>
anion anion F <class 'numpy.str_'>
RET VALUES :  [ParamVector(hse_gap = 4.8004)]
 
PARAMS :  ['propylammonium' 'Sn' 'Br']
PARAM TYPE :  <class 'numpy.ndarray'>
PROCESSED PARAMS :  [array(['propylammonium', 'Sn', 'Br'], dtype='<U14')]
organic organic propylammonium <class 'numpy.str_'>
cation cation Sn <class 'numpy.str_'>
anion anion Br <class 'numpy.str_'>
RET VALUES :  [ParamVector(hse_gap = 2.798)]
 
PARAMS :  ['methylammonium' 'Pb' 'Cl']
PARAM TYPE :  <class 'numpy.ndarray'>
PROCESSED PARAMS :  [array(['methylammonium', 'Pb', 'Cl'], dtype='<U14')]
organic organic methylammonium <class 'numpy.str_'>
cation cation Pb <class 'numpy.str_'>
anion anion Cl <class 'numpy.str_'>
RET VALUES :  [ParamVector(hse_gap = 3.424)]
 
PARAMS :  ['trimethylammonium'

## Single-objective categorical optimization through low level interface

In [64]:
dataset = Dataset(kind='perovskites')

planner = Planner(kind='RandomSearch', goal=dataset.goal)
planner.set_param_space(dataset.param_space)

campaign = Campaign()
campaign.set_param_space(dataset.param_space)

budget = 24

In [65]:
for i in range(budget):
    
    sample = planner.recommend(campaign.observations)
    
    measurement = dataset.run(sample)
    
    print(sample, measurement)

PARAMS :  ParamVector(organic = tetramethylammonium, cation = Pb, anion = Br)
PARAM TYPE :  <class 'olympus.objects.object_parameter_vector.ObjectParameterVector'>
PROCESSED PARAMS :  [array(['tetramethylammonium', 'Pb', 'Br'], dtype='<U19')]
organic organic tetramethylammonium <class 'numpy.str_'>
cation cation Pb <class 'numpy.str_'>
anion anion Br <class 'numpy.str_'>
RET VALUES :  [3.186]
 
ParamVector(organic = tetramethylammonium, cation = Pb, anion = Br) [3.186]
PARAMS :  ParamVector(organic = tetramethylammonium, cation = Pb, anion = Br)
PARAM TYPE :  <class 'olympus.objects.object_parameter_vector.ObjectParameterVector'>
PROCESSED PARAMS :  [array(['tetramethylammonium', 'Pb', 'Br'], dtype='<U19')]
organic organic tetramethylammonium <class 'numpy.str_'>
cation cation Pb <class 'numpy.str_'>
anion anion Br <class 'numpy.str_'>
RET VALUES :  [3.186]
 
ParamVector(organic = tetramethylammonium, cation = Pb, anion = Br) [3.186]
PARAMS :  ParamVector(organic = tetramethylammonium,

## Multiobjective fully categorical optimization with lower-level interface

In [122]:
dataset = Dataset(kind='dye_lasers')

In [123]:
# initialize planner
planner = Planner(kind='RandomSearch', goal='minimize')
planner.set_param_space(dataset.param_space)

# initialize scalarizing function
tolerances = [0.2, 0.5, 0.5]
absolutes = [False, False, False]
goals = ['max', 'min', 'max']

scalarizer = Scalarizer(
    kind='Chimera', 
    value_space=dataset.value_space,  
    tolerances=tolerances, 
    absolutes=absolutes,
    goals=goals,
)

campaign = Campaign()
campaign.set_param_space(dataset.param_space)
campaign.set_value_space(dataset.value_space)

print(campaign.value_space)

Continuous (name='peak_score', low=0.0, high=1.0, is_periodic=False)
Continuous (name='spectral_overlap', low=0.0, high=1.0, is_periodic=False)
Continuous (name='fluo_rate', low=0.0, high=1.0, is_periodic=False)


In [124]:
# evaluator = Evaluator(
#     planner=planner, 
#     emulator=dataset,
#     campaign=campaign,
#     scalarizer=scalarizer,
# )

budget = 5

In [125]:
for i in range(budget):
    samples = planner.recommend(campaign.scalarized_observations)
    
    measurement = dataset.run(samples, return_paramvector=True)
    print(samples)
    print(measurement)
    print('\n\n')
    campaign.add_and_scalarize(samples, measurement[0], scalarizer)

PARAMS :  ParamVector(frag_a = OB(O)c1ccnc(-n2c3ccccc3c3ccccc32)c1, frag_b = C[N+]12CC(=O)O[B-]1(c1sccc1Br)OC(=O)C2, frag_c = C[Si](C)(c1ccc(Br)cc1)c1ccc(Br)cc1)
PARAM TYPE :  <class 'olympus.objects.object_parameter_vector.ObjectParameterVector'>
PROCESSED PARAMS :  [array(['OB(O)c1ccnc(-n2c3ccccc3c3ccccc32)c1',
       'C[N+]12CC(=O)O[B-]1(c1sccc1Br)OC(=O)C2',
       'C[Si](C)(c1ccc(Br)cc1)c1ccc(Br)cc1'], dtype='<U38')]
frag_a frag_a OB(O)c1ccnc(-n2c3ccccc3c3ccccc32)c1 <class 'numpy.str_'>
frag_b frag_b C[N+]12CC(=O)O[B-]1(c1sccc1Br)OC(=O)C2 <class 'numpy.str_'>
frag_c frag_c C[Si](C)(c1ccc(Br)cc1)c1ccc(Br)cc1 <class 'numpy.str_'>
RET VALUES :  [ParamVector(peak_score = 0.7967581324451161, spectral_overlap = 0.0673027384897604, fluo_rate = 0.0112739026465739)]
 
ParamVector(frag_a = OB(O)c1ccnc(-n2c3ccccc3c3ccccc32)c1, frag_b = C[N+]12CC(=O)O[B-]1(c1sccc1Br)OC(=O)C2, frag_c = C[Si](C)(c1ccc(Br)cc1)c1ccc(Br)cc1)
[ParamVector(peak_score = 0.7967581324451161, spectral_overlap = 0.0673027