# **Load models for comparison**

In [None]:
import sys
sys.path.insert(0, '../')

import molecule_vae

character_weights = "../weights/CharVAE_L128.hdf5"
character_model = molecule_vae.CharacterModel(character_weights, two_tower=False)

two_tower_character_weights = "../weights/Two_tower_CharVAE_L128.hdf5"
two_tower_character_model = molecule_vae.CharacterModel(two_tower_character_weights, two_tower=True)

grammar_weights = "../weights/GrammarVAE_L128.hdf5"
grammar_model = molecule_vae.GrammarModel(grammar_weights, two_tower=False)

two_tower_grammar_weights = "../weights/Two_tower_GrammarVAE_L128.hdf5"
two_tower_grammar_model = molecule_vae.GrammarModel(two_tower_grammar_weights, two_tower=True)

# **Load property data**

In [None]:
import pandas as pd

data_path="../datasets/delaney.csv"

df=pd.read_csv(data_path)
df=df.loc[~df['smiles'].str.contains('.', regex=False)] # clean smiles
targets=df.iloc[:, 1:].values
smiles=df["smiles"].values

df

# **Encoding datasets**

In [None]:
from molecule_vae import encode_smiles

# Grammar model
two_tower_grammar_smiles, two_tower_grammar_targets, two_tower_grammar_latent_rep = encode_smiles(two_tower_grammar_model, smiles, targets)
grammar_smiles, grammar_targets, grammar_latent_rep = encode_smiles(grammar_model, smiles, targets)

# # Character model
two_tower_char_smiles_list, two_tower_char_targets, two_tower_char_latent_rep = encode_smiles(two_tower_character_model, smiles, targets)
char_smiles, char_targets, char_latent_rep = encode_smiles(character_model, smiles, targets)

# **Regression**

NOTE: run the following cells if the prediction task is a regression

In [None]:
from prediction_utils import get_regression_loss

get_regression_loss(two_tower_grammar_latent_rep, grammar_targets, latent_rep_size=128, loss_metric='rmse')
get_regression_loss(grammar_latent_rep, grammar_targets, latent_rep_size=128, loss_metric='rmse')

get_regression_loss(two_tower_char_latent_rep, char_targets, latent_rep_size=128, loss_metric='rmse')
get_regression_loss(char_latent_rep, char_targets, latent_rep_size=128, loss_metric='rmse')

# **Binary Classification**

NOTE: run the following cells if the prediction task is a binary classification

In [None]:
from prediction_utils import get_ROC_score

get_ROC_score(two_tower_grammar_latent_rep, grammar_targets, latent_rep_size=128)
get_ROC_score(grammar_latent_rep, grammar_targets, latent_rep_size=128)

get_ROC_score(two_tower_char_latent_rep, char_targets, latent_rep_size=128)
get_ROC_score(char_latent_rep, char_targets, latent_rep_size=128)

# **Multi-label Classification**

NOTE: run the following cells if the prediction task is a multi-label classification

In [None]:
from prediction_utils import get_multilabel_ROC_score

get_multilabel_ROC_score(two_tower_grammar_latent_rep, grammar_targets, latent_rep_size=128, output_size=2)
get_multilabel_ROC_score(grammar_latent_rep, grammar_targets, latent_rep_size=128, output_size=2)

get_multilabel_ROC_score(two_tower_char_latent_rep, char_targets, latent_rep_size=128, output_size=2)
get_multilabel_ROC_score(char_latent_rep, char_targets, latent_rep_size=128, output_size=2)