In [None]:
import sys
sys.path.insert(0, '../')

import molecule_vae

grammar_weights = "../weights/GrammarVAE_L128.hdf5"
grammar_model = molecule_vae.GrammarModel(grammar_weights, latent_rep_size=128)

two_tower_grammar_weights = "../weights/Two_tower_GrammarVAE_L128.hdf5"
two_tower_grammar_model = molecule_vae.GrammarModel(two_tower_grammar_weights, latent_rep_size=128, two_tower=True)

character_weights = "../weights/CharVAE_L128.hdf5"
character_model = molecule_vae.CharacterModel(character_weights, latent_rep_size=128)

two_tower_character_weights = "../weights/Two_tower_CharVAE_L128.hdf5"
two_tower_character_model = molecule_vae.CharacterModel(two_tower_character_weights, latent_rep_size=128, two_tower=True)

In [None]:
from tqdm import tqdm
import rdkit
from rdkit import Chem

decode_attempts = 100

def decode_from_latent_space(latent_points, model):
    decoded_molecules = []
    for i in tqdm(range(decode_attempts), leave=False):
        current_decoded_molecules = model.decode(latent_points)
        current_decoded_molecules = [x if x != '' else 'Sequence too long' for x in current_decoded_molecules]
        decoded_molecules.extend(current_decoded_molecules)
    num_valid=0
    for smile in decoded_molecules:   
        if Chem.MolFromSmiles(smile) is not None:
            num_valid+=1

    return num_valid/len(decoded_molecules)

In [None]:
import numpy as np

model = two_tower_grammar_model

n_samples = 1000
batch_size = 100
latent_rep_size = 128
epochs = n_samples // batch_size
n_valid = []
for i in tqdm(range(epochs)):
    z1=np.random.normal(scale=1, size=(batch_size, latent_rep_size))
    frac_valid = decode_from_latent_space(z1, model)
    n_valid.append(frac_valid)
    print(np.average(n_valid), np.std(n_valid))

print(np.average(n_valid), np.std(n_valid))