In [1]:
# import required packages
import pandas as pd
from rdkit import Chem
import tensorflow as tf 
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
tf.random.set_seed(42)
import deepchem as dc 
from scripts.graphConvModel import GraphConvModel

# Prediction demonstration

1. To predict a given fluorophore, place the molecule's name (or ID) and its SMILES into the excel `To_Predict.csv` (within the data folder) following the template. Add more lines to predict more molecules together.
2. The predictions made are then saved to `Predictions.csv` in this folder.

In [2]:
# reload model
current_tasks = ["Eg"]
model = GraphConvModel(n_tasks = len(current_tasks),
                       graph_conv_layers = [512, 512, 512, 512], 
                       dense_layers = [128, 128, 128],
                       dropout = 0.01,
                       learning_rate = 0.001,
                       batch_size = 10,
                       uncertainty = False,
                       model_dir = "Trained_Model") # <-- Trained model directory
model.restore(model.get_checkpoints()[-1])

# featurizing structures from SMILES 
df = pd.read_csv("data/To_Predict.csv")
df["MOLS"] = [Chem.MolFromSmiles(smi) for smi in df.SMILES]
graph_featurizer = dc.feat.graph_features.ConvMolFeaturizer()
graph_list = graph_featurizer.featurize(df["MOLS"])
dataset = dc.data.NumpyDataset(graph_list, ids = df["Name"])

# make predictions and save to output csv
pred = model.predict(dataset)
df["Predicted Eg (eV)"] = pred
df[["Name", "SMILES", "Predicted Eg (eV)"]].to_csv("Predictions.csv")

# print out predictions
print("Predicted energy gaps (eV)")
for i, x in enumerate(pred):
    p = str(round(x[0],4))
    print("{:<8} | {:<4}".format(df['Name'].values[i], p))

Predicted energy gaps (eV)
NDA_100  | 2.463
NDA_101  | 2.8933
NDA_102  | 2.1954
NDAD_801 | 1.3457
NDAD_802 | 1.6287
NDAD_803 | 1.2586
