In [15]:
import matplotlib.pylab as plt
import numpy as np
import seaborn as sns; sns.set()
%matplotlib inline
from os.path import join
from IPython.display import clear_output, display

import keras
from keras import objectives
from keras.callbacks import EarlyStopping
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input, Multiply, Add
from keras.optimizers import Adam, Nadam
from keras.models import load_model

from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from random import shuffle
import pandas as pd
import random

#Keras build
from keras import backend as K
from keras.objectives import binary_crossentropy #objs or losses
from keras.models import Model
from keras.layers import Input, Dense, Lambda, Layer
from keras.layers.core import Dense, Activation, Flatten, RepeatVector
from keras.layers.wrappers import TimeDistributed
from keras.layers.recurrent import GRU
from keras.layers.convolutional import Convolution1D

#chem
from rdkit import Chem
from rdkit.Chem import AllChem as Chem
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit import DataStructs
from rdkit.Chem import Draw
from rdkit.Chem import MACCSkeys
import salty
import gains as genetic

#cation data
cations = pd.read_csv('../data/cations.csv')
cations = cations['smiles_string']
salts = pd.read_csv('../data/salts.csv')
salts = salts['smiles_string']
categories = pd.read_csv('../data/categories.csv')
categories = categories['category']
coldic = pd.read_csv('../data/coldic.csv')
coldic = coldic.to_dict(orient='records')[0]
salt_coldic = pd.read_csv('../data/salt_coldic.csv')
salt_coldic = salt_coldic.to_dict(orient='records')[0]
salt_categories = pd.read_csv('../data/salt_categories.csv')
salt_categories = salt_categories['category']
density_coldic = pd.read_csv('../data/density_coldic.csv')
density_coldic = density_coldic.to_dict(orient='records')[0]
density_categories = pd.read_csv('../data/density_categories.csv')
density_categories = density_categories['category']

#supporting functions
import sys
sys.path.insert(0, '../')
from scripts import *

#training array info
smile_max_length = 105
import json
f = open("../data/salt_char_to_index.json","r")
ani_char_to_index = json.loads(f.read())
ani_char_set = set(ani_char_to_index.keys())
ani_char_list = list(ani_char_to_index.keys())
ani_chars_in_dict = len(ani_char_list)
ani_index_to_char = dict((i, c) for i, c in enumerate(ani_char_list))

#training array info
import json
f = open("../data/gdb_char_to_index.json","r")
cat_char_to_index = json.loads(f.read())
cat_char_set = set(cat_char_to_index.keys())
cat_char_list = list(cat_char_to_index.keys())
cat_chars_in_dict = len(cat_char_list)
cat_index_to_char = dict((i, c) for i, c in enumerate(cat_char_list))

char_to_index = ani_char_to_index
char_set = ani_char_set
char_list = ani_char_list
chars_in_dict = ani_chars_in_dict
index_to_char = ani_index_to_char
        
gen3vae = TwoMoleculeVAE()
gen3vae.create(char_set, char_set, qspr=True, weights_file='../models/gen3_2mol_1mil_GDB17_mix_pure_5.h5')

# Search by Property
The general setup-
1. generate N structures of target Cp and p values from the VAE and GA (and maybe a VAE w/o QSPR --- use a separate QSPR --- this would be to show that the organization of the latent space is actually helping…)
2. calculate ground-truth values using QM/MD
3. compare the % of the found structures that were close to the target for each method.

So the table is:

- **properties**: p, Cp
- **models**: VAE, VAE+QSPR, EA
- **number of**: total function calls, returned structures rdkit-sanitizable, returned gaussian structures, returned MD systems, % error of prediction

The basic setup is here. we will want to, however, throw out structures where the Cp/p was far from our top 10 structure seeds (high Cp, low p)

We seed all methods with the top 10 structures from our dataset. 

# No QSPR

In [16]:
hits = 1

In [17]:
gen3vae = TwoMoleculeVAE()
gen3vae.create(char_set, char_set, qspr=True, weights_file='../models/gen3_2mol_1mil_GDB17_mix_pure_5.h5')

In [18]:
model_ID = ['cpt']
salts, cations, target = return_top_cations(model_ID, return_min_values=True)

Heat capacity at constant pressure, J/K/mol
total salts in training data:	1739
unique salts:	15
unique cations:	10
min/max values:	918.0000, 9000.0000

salts sorted in descending order and the minimum value of the top 10 unique cations was returned


In [19]:
df_cpt_no_qspr = generate_solvent_vae(gen3vae, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=True,
                                      md_model=['cpt_4'],
                                      verbose=1)

ani seed         CCCCC(CC)COC(=O)CC(C(=O)OCC(CC)CCCC)S(=O)(=O)[O-]
attempts                                                       324
candidate                                C=CC[N+](CCCC)(CCCC)CCCCC
cat seed                                  CCCC[N+](CCCC)(CCCC)CCCC
rdkit qspr                                                 1141.31
rdkit-md qspr                                                969.2
salt             C=CC[N+](CCCC)(CCCC)CCCCC.CCCCC(CC)COC(=O)CC(C...
temperature                                                 1.9401
vae qspr                                                  0.990878
Name: 0, dtype: object
1/1 found


In [20]:
model_ID = ['thermal_conductivity']
salts, cations, target = return_top_cations(model_ID, return_min_values=True)

Thermal conductivity, W/m/K
total salts in training data:	148
unique salts:	23
unique cations:	10
min/max values:	0.1667, 0.2380

salts sorted in descending order and the minimum value of the top 10 unique cations was returned


In [21]:
df_thermal_no_qspr = generate_solvent_vae(gen3vae, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=True,
                                      md_model=None,
                                      verbose=1)

ani seed                           CCOP(=O)([O-])OCC
attempts                                          68
candidate                        C[n+]1ccn(CCCC#N)c1
cat seed                         CCCCCCn1cc[n+](c1)C
rdkit qspr                                      0.17
salt           C[n+]1ccn(CCCC#N)c1.CCOP(=O)([O-])OCC
temperature                                  1.45093
vae qspr                                     1.17376
Name: 0, dtype: object
1/1 found


In [22]:
model_ID = ['viscosity']
salts, cations, target = return_top_cations(model_ID, return_min_values=False)

Viscosity, Pa&#8226;s
total salts in training data:	3277
unique salts:	18
unique cations:	10
min/max values:	0.0017, 0.0106

salts sorted in ascending order and the maximum value of the top 10 unique cations was returned


In [23]:
df_viscosity_no_qspr = generate_solvent_vae(gen3vae, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=False,
                                      md_model=None,
                                      verbose=1)

ani seed                    CC(=O)[O-]
attempts                           695
candidate                 OC1=[N+]CCC1
cat seed               C[N+]1=C(CCC1)O
rdkit qspr                           0
salt           OC1=[N+]CCC1.CC(=O)[O-]
temperature                    1.74085
vae qspr                       1.08132
Name: 0, dtype: object
1/1 found


In [24]:
model_ID = ['density']
salts, cations, target = return_top_cations(model_ID, return_min_values=False)

Specific density, kg/m<SUP>3</SUP>
total salts in training data:	5631
unique salts:	17
unique cations:	10
min/max values:	871.3000, 962.7000

salts sorted in ascending order and the maximum value of the top 10 unique cations was returned


In [25]:
df_density_no_qspr = generate_solvent_vae(gen3vae, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=False,
                                      md_model=['density_4'],
                                      verbose=1)

ani seed                                                 [Cl-]
attempts                                                    84
candidate              CCCCCCCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC
cat seed            C(CCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC)CCCCC
rdkit qspr                                              885.91
rdkit-md qspr                                           932.64
salt             CCCCCCCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC.[Cl-]
temperature                                            1.20682
vae qspr                                              0.953953
Name: 0, dtype: object
1/1 found


# 10 Epoch

In [26]:
gen3vae_viscosity = TwoMoleculeVAE()
gen3vae_viscosity.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_viscosity_10.h5')
gen3vae_thermal = TwoMoleculeVAE()
gen3vae_thermal.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_thermal_conductivity_10.h5')
gen3vae_cpt = TwoMoleculeVAE()
gen3vae_cpt.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_cpt_10.h5')
gen3vae_density = TwoMoleculeVAE()
gen3vae_density.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_density_10.h5')

In [27]:
model_ID = ['cpt']
salts, cations, target = return_top_cations(model_ID, return_min_values=True)

Heat capacity at constant pressure, J/K/mol
total salts in training data:	1739
unique salts:	15
unique cations:	10
min/max values:	918.0000, 9000.0000

salts sorted in descending order and the minimum value of the top 10 unique cations was returned


In [28]:
df_cpt_10_qspr = generate_solvent_vae(gen3vae_cpt, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=True,
                                      md_model=['cpt_4'],
                                      verbose=1)

ani seed         CCCCC(CC)COC(=O)CC(C(=O)OCC(CC)CCCC)S(=O)(=O)[O-]
attempts                                                       314
candidate                   CCCCCCCCCCCCC[P+](CCCCC)(CCCCCC)CCCCCC
cat seed                C(CCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC)CCCCC
rdkit qspr                                                 1397.18
rdkit-md qspr                                              1673.22
salt             CCCCCCCCCCCCC[P+](CCCCC)(CCCCCC)CCCCCC.CCCCC(C...
temperature                                                1.64889
vae qspr                                                   668.473
Name: 0, dtype: object
1/1 found


In [29]:
model_ID = ['thermal_conductivity']
salts, cations, target = return_top_cations(model_ID, return_min_values=True)

Thermal conductivity, W/m/K
total salts in training data:	148
unique salts:	23
unique cations:	10
min/max values:	0.1667, 0.2380

salts sorted in descending order and the minimum value of the top 10 unique cations was returned


In [30]:
df_thermal_10_qspr = generate_solvent_vae(gen3vae_thermal, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=True,
                                      md_model=None,
                                      verbose=1)

ani seed                       C(#N)[N-]C#N
attempts                                301
candidate                   CNn1cc[n+](C)c1
cat seed                    CCn1cc[n+](c1)C
rdkit qspr                             0.17
salt           CNn1cc[n+](C)c1.C(#N)[N-]C#N
temperature                         1.92014
vae qspr                           0.219434
Name: 0, dtype: object
1/1 found


In [31]:
model_ID = ['viscosity']
salts, cations, target = return_top_cations(model_ID, return_min_values=False)

Viscosity, Pa&#8226;s
total salts in training data:	3277
unique salts:	18
unique cations:	10
min/max values:	0.0017, 0.0106

salts sorted in ascending order and the maximum value of the top 10 unique cations was returned


In [32]:
df_viscosity_10_qspr = generate_solvent_vae(gen3vae_viscosity, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=False,
                                      md_model=None,
                                      verbose=1)

ani seed                       C(=C(C#N)C#N)=[N-]
attempts                                      347
candidate                         CC[NH+]1C=NCCC1
cat seed                          CC[NH+]1C=CN=C1
rdkit qspr                                   0.01
salt           CC[NH+]1C=NCCC1.C(=C(C#N)C#N)=[N-]
temperature                               1.78378
vae qspr                                0.0110406
Name: 0, dtype: object
1/1 found


In [33]:
model_ID = ['density']
salts, cations, target = return_top_cations(model_ID, return_min_values=False)

Specific density, kg/m<SUP>3</SUP>
total salts in training data:	5631
unique salts:	17
unique cations:	10
min/max values:	871.3000, 962.7000

salts sorted in ascending order and the maximum value of the top 10 unique cations was returned


In [34]:
df_density_10_qspr = generate_solvent_vae(gen3vae_density, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=False,
                                      md_model=['density_4'],
                                      verbose=1)

ani seed                                                CC(=O)[O-]
attempts                                                        57
candidate               CCCCCCCCCCCCC[P+](CCCCC)(CCCCCC)CCCCCCCCCC
cat seed                C(CCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC)CCCCC
rdkit qspr                                                   846.2
rdkit-md qspr                                               896.13
salt             CCCCCCCCCCCCC[P+](CCCCC)(CCCCCC)CCCCCCCCCC.CC(...
temperature                                                1.53536
vae qspr                                                   980.286
Name: 0, dtype: object
1/1 found


# 30 Epoch

In [35]:
gen3vae_viscosity = TwoMoleculeVAE()
gen3vae_viscosity.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_viscosity_30.h5')
gen3vae_thermal = TwoMoleculeVAE()
gen3vae_thermal.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_thermal_conductivity_30.h5')
gen3vae_cpt = TwoMoleculeVAE()
gen3vae_cpt.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_cpt_30.h5')
gen3vae_density = TwoMoleculeVAE()
gen3vae_density.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_density_30.h5')

In [36]:
model_ID = ['cpt']
salts, cations, target = return_top_cations(model_ID, return_min_values=True)

Heat capacity at constant pressure, J/K/mol
total salts in training data:	1739
unique salts:	15
unique cations:	10
min/max values:	918.0000, 9000.0000

salts sorted in descending order and the minimum value of the top 10 unique cations was returned


In [37]:
df_cpt_30_qspr = generate_solvent_vae(gen3vae_cpt, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=True,
                                      md_model=['cpt_4'],
                                      verbose=1)

ani seed         CCCCC(CC)COC(=O)CC(C(=O)OCC(CC)CCCC)S(=O)(=O)[O-]
attempts                                                        21
candidate                  CCCCCCCCCCCCCCCCCC[P+](CCC)CCCCCCCCCCCC
cat seed                C(CCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC)CCCCC
rdkit qspr                                                 1361.07
rdkit-md qspr                                              1056.98
salt             CCCCCCCCCCCCCCCCCC[P+](CCC)CCCCCCCCCCCC.CCCCC(...
temperature                                               0.237603
vae qspr                                                   527.711
Name: 0, dtype: object
1/1 found


In [38]:
model_ID = ['thermal_conductivity']
salts, cations, target = return_top_cations(model_ID, return_min_values=True)

Thermal conductivity, W/m/K
total salts in training data:	148
unique salts:	23
unique cations:	10
min/max values:	0.1667, 0.2380

salts sorted in descending order and the minimum value of the top 10 unique cations was returned


In [39]:
df_thermal_30_qspr = generate_solvent_vae(gen3vae_thermal, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=True,
                                      md_model=None,
                                      verbose=1)

ani seed                     N#C[B-](C#N)(C#N)C#N
attempts                                      259
candidate                           CN1C=C[N+]=C1
cat seed                           Cn1cc[n+](c1)C
rdkit qspr                                   0.18
salt           CN1C=C[N+]=C1.N#C[B-](C#N)(C#N)C#N
temperature                                1.6051
vae qspr                                 0.196734
Name: 0, dtype: object
1/1 found


In [40]:
model_ID = ['viscosity']
salts, cations, target = return_top_cations(model_ID, return_min_values=False)

Viscosity, Pa&#8226;s
total salts in training data:	3277
unique salts:	18
unique cations:	10
min/max values:	0.0017, 0.0106

salts sorted in ascending order and the maximum value of the top 10 unique cations was returned


In [41]:
df_viscosity_30_qspr = generate_solvent_vae(gen3vae_viscosity, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=False,
                                      md_model=None,
                                      verbose=1)

ani seed                      CC(=O)[O-]
attempts                              83
candidate                 CCN1C=C[N+]=C1
cat seed                 CCn1cc[n+](c1)C
rdkit qspr                          0.01
salt           CCN1C=C[N+]=C1.CC(=O)[O-]
temperature                      1.60287
vae qspr                       0.0767978
Name: 0, dtype: object
1/1 found


In [42]:
model_ID = ['density']
salts, cations, target = return_top_cations(model_ID, return_min_values=False)

Specific density, kg/m<SUP>3</SUP>
total salts in training data:	5631
unique salts:	17
unique cations:	10
min/max values:	871.3000, 962.7000

salts sorted in ascending order and the maximum value of the top 10 unique cations was returned


In [43]:
df_density_30_qspr = generate_solvent_vae(gen3vae_density, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=False,
                                      md_model=['density_4'],
                                      verbose=1)

ani seed                                      N#C[B-](C#N)(C#N)C#N
attempts                                                        50
candidate                   CCCCCCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC
cat seed                C(CCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC)CCCCC
rdkit qspr                                                     875
rdkit-md qspr                                               918.84
salt             CCCCCCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC.N#C[B-]...
temperature                                                1.66436
vae qspr                                                   657.062
Name: 0, dtype: object
1/1 found


# 100 Epoch

In [44]:
gen3vae_viscosity = TwoMoleculeVAE()
gen3vae_viscosity.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_viscosity_100.h5')
gen3vae_thermal = TwoMoleculeVAE()
gen3vae_thermal.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_thermal_conductivity_100.h5')
gen3vae_cpt = TwoMoleculeVAE()
gen3vae_cpt.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_cpt_100.h5')
gen3vae_density = TwoMoleculeVAE()
gen3vae_density.create(char_set, char_set, qspr=True, weights_file='../models/gen3vae_density_100.h5')

In [47]:
model_ID = ['cpt']
salts, cations, target = return_top_cations(model_ID, return_min_values=True)

Heat capacity at constant pressure, J/K/mol
total salts in training data:	1739
unique salts:	15
unique cations:	10
min/max values:	918.0000, 9000.0000

salts sorted in descending order and the minimum value of the top 10 unique cations was returned


In [48]:
df_cpt_100_qspr = generate_solvent_vae(gen3vae_cpt, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=True,
                                      md_model=['cpt_4'],
                                      verbose=1)

ani seed                                        C(CS(=O)(=O)[O-])N
attempts                                                       152
candidate                   CCCCCCC(CCCCCCCC[P+](C)CCCC)C(CCC)CCCC
cat seed                C(CCCCCCCC[P+](CCCCCC)(CCCCCC)CCCCCC)CCCCC
rdkit qspr                                                 1316.97
rdkit-md qspr                                              1714.68
salt             CCCCCCC(CCCCCCCC[P+](C)CCCC)C(CCC)CCCC.C(CS(=O...
temperature                                                1.53985
vae qspr                                                   637.359
Name: 0, dtype: object
1/1 found


In [49]:
model_ID = ['thermal_conductivity']
salts, cations, target = return_top_cations(model_ID, return_min_values=True)

Thermal conductivity, W/m/K
total salts in training data:	148
unique salts:	23
unique cations:	10
min/max values:	0.1667, 0.2380

salts sorted in descending order and the minimum value of the top 10 unique cations was returned


In [50]:
df_thermal_100_qspr = generate_solvent_vae(gen3vae_thermal, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=True,
                                      md_model=None,
                                      verbose=1)

ani seed                        N#C[B-](C#N)(C#N)C#N
attempts                                         137
candidate                           CCCCN1C=C[N+]=C1
cat seed                           CCCCn1cc[n+](c1)C
rdkit qspr                                      0.18
salt           CCCCN1C=C[N+]=C1.N#C[B-](C#N)(C#N)C#N
temperature                                  1.39111
vae qspr                                    0.179599
Name: 0, dtype: object
1/1 found


In [51]:
model_ID = ['viscosity']
salts, cations, target = return_top_cations(model_ID, return_min_values=False)

Viscosity, Pa&#8226;s
total salts in training data:	3277
unique salts:	18
unique cations:	10
min/max values:	0.0017, 0.0106

salts sorted in ascending order and the maximum value of the top 10 unique cations was returned


In [52]:
df_viscosity_100_qspr = generate_solvent_vae(gen3vae_viscosity, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=False,
                                      md_model=None,
                                      verbose=1)

ani seed                       CCC(=O)[O-]
attempts                               125
candidate                  C[N+]1=C(O)CCO1
cat seed                   C[N+]1=C(CCC1)O
rdkit qspr                            0.01
salt           C[N+]1=C(O)CCO1.CCC(=O)[O-]
temperature                       0.967897
vae qspr                        0.00336071
Name: 0, dtype: object
1/1 found


In [53]:
model_ID = ['density']
salts, cations, target = return_top_cations(model_ID, return_min_values=False)

Specific density, kg/m<SUP>3</SUP>
total salts in training data:	5631
unique salts:	17
unique cations:	10
min/max values:	871.3000, 962.7000

salts sorted in ascending order and the maximum value of the top 10 unique cations was returned


In [54]:
df_density_100_qspr = generate_solvent_vae(gen3vae_density, 
                                      char_to_index,
                                      smile_max_length=62,
                                      salts=salts,
                                      model_ID=model_ID, 
                                      target=target, 
                                      qspr=True, 
                                      find=hits, 
                                      optimalCutOff=target, 
                                      greaterThanCutOff=False,
                                      md_model=['density_4'],
                                      verbose=1)

ani seed                                    CCCCCCCCOP(=O)[O-]
attempts                                                    24
candidate                           CCCCCCCCCCC=[N+](CCCC)CCCC
cat seed                              CCCC[P+](CCCC)(CCCC)CCCC
rdkit qspr                                              872.54
rdkit-md qspr                                           908.34
salt             CCCCCCCCCCC=[N+](CCCC)CCCC.CCCCCCCCOP(=O)[O-]
temperature                                           0.192577
vae qspr                                               1062.92
Name: 0, dtype: object
1/1 found


In [None]:
# df_viscosity_no_qspr.to_csv('../data/df_gen3vae_viscosity_no_qspr_91102_function_calls', index=False)