In [1]:
from rdkit import RDLogger
import pandas as pd
import numpy as np
from dgllife.utils import smiles_to_bigraph
from dgllife.utils import CanonicalAtomFeaturizer, CanonicalBondFeaturizer
from dgllife.utils import ConcatFeaturizer
from dgllife.utils import atom_degree, atom_is_aromatic, atomic_number, atom_is_in_ring, BaseAtomFeaturizer
from dgllife.utils import bond_type_one_hot, bond_is_conjugated, bond_is_in_ring, BaseBondFeaturizer
import torch
from tqdm.notebook import tqdm
from dgl.data.utils import load_graphs
from dgl.data.utils import save_graphs
from dgl.dataloading.pytorch import GraphDataLoader
from torch.utils.data import DataLoader
from rdkit import Chem
from torch.utils.data import Dataset
import torch.nn as nn
from dgl.nn.pytorch import Set2Set
#from dgl.nn.pytorch import MPNNGNN
#from dgllife.model.model_zoo.mpnn_predictor import MPNNPredictor
from dgllife.model.gnn.mpnn import MPNNGNN
from dgllife.model.readout.mlp_readout import MLPNodeReadout
import wandb

Using backend: pytorch


In [2]:
elements = pd.read_csv('indiv_energies.csv')
elements = dict(zip(elements['symbol'], elements['E']))

def get_E_pred(smiles):
    m = Chem.MolFromSmiles(smiles)
    m = Chem.AddHs(m)
    pred = 0
    for atom in m.GetAtoms():
        pred += elements[atom.GetSymbol()]
    return pred

In [3]:
RDLogger.DisableLog('rdApp.*') # disable annoying warning messages during featurization -- we'll just remove incompletely featurized molecules later

dat = []

for dataset in ['B', 'C', 'D', 'G', 'H', 'I']:
    dat.append(pd.read_csv('../data/dataset-%s-E0.csv' % dataset))
    
dat = pd.concat(dat, axis=0)
dat = dat.dropna()

smiles = dat['SMILES']
Y = dat['E_0']
E_diff = Y - smiles.apply(get_E_pred)

del dat

In [4]:
def featurize_atoms(mol):
    featurizer = CanonicalAtomFeaturizer()
    return featurizer(mol)

In [5]:
def featurize_bonds(mol):
    featurizer = CanonicalBondFeaturizer()
    return featurizer(mol)

In [9]:
# graphs = []

# for m in tqdm(smiles):
#     g = smiles_to_bigraph(m, node_featurizer=featurize_atoms, edge_featurizer=featurize_bonds, explicit_hydrogens=True)
#     graphs.append(g)

# save_graphs('canonical_feat_graphs', graphs)