In [1]:
import numpy as np  # sometimes needed to avoid mkl-service error
import sys
import os
import argparse
import logging
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from pytorch_lightning.loggers import CSVLogger, WandbLogger
from pytorch_lightning.plugins import DDPPlugin
from pytorch_lightning.utilities import rank_zero_only
import torch
from torchmdnet.module import LNNP
from torchmdnet import datasets, priors, models
from torchmdnet.data import DataModule
from torchmdnet.models import output_modules
from torchmdnet.models.utils import rbf_class_mapping, act_class_mapping
from torchmdnet.utils import LoadFromFile, LoadFromCheckpoint, save_argparse, number
from pathlib import Path
import wandb
import json
import pandas as pd
from rdkit.Chem import AllChem
import copy
from rdkit.Geometry import Point3D
from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.ipython_3d = True
import py3Dmol
from rdkit.Chem import rdDepictor
from rdkit.Chem import rdDistGeom
import rdkit

In [2]:
with open('commandline_args.txt', 'r') as f:
    args = json.load(f)
data = DataModule(args)
data.prepare_data()
data.setup("fit")

Processing...
100%|██████████| 500/500 [00:04<00:00, 100.04it/s]
Done!


0
train 400, val 50, test 37


  rank_zero_warn(
computing mean and std:   0%|          | 0/4 [00:00<?, ?it/s]


In [3]:
def print_atoms(mol):
    atoms=mol.GetAtoms()
    names=",".join([atom.GetSymbol() for atom in atoms ])
    print(names)

In [4]:
import random
for i in range(10):
    rand=random.randrange(0,400)
    mol=data.dataset_maybe_noisy[rand].mol
    noisy_mol=data.dataset_maybe_noisy[rand].noisy_mol
    print_atoms(mol)
    print_atoms(noisy_mol)

C,C,C,C,C,O,H,H,H,H,H,H,H,H,H,H
C,C,C,C,C,O,H,H,H,H,H,H,H,H,H,H
O,N,C,C,O,C,H,H,H,H,H
O,N,C,C,O,C,H,H,H,H,H
N,C,C,C,N,H,H,H,H
N,C,C,C,N,H,H,H,H
O,C,C,C,O,C,H,H,H,H,H,H,H,H
O,C,C,C,O,C,H,H,H,H,H,H,H,H
C,C,O,C,H,H,H,H,H,H,H,H
C,C,O,C,H,H,H,H,H,H,H,H
C,C,N,C,O,H,H,H,H,H
C,C,N,C,O,H,H,H,H,H
C,C,N,C,N,O,H,H,H,H,H,H,H,H
C,C,N,C,N,O,H,H,H,H,H,H,H,H
C,C,C,C,C,O,H,H,H,H,H,H,H,H,H,H
C,C,C,C,C,O,H,H,H,H,H,H,H,H,H,H
N,C,C,C,C,H,H,H,H,H
N,C,C,C,C,H,H,H,H,H
N,C,N,C,O,H,H,H,H
N,C,N,C,O,H,H,H,H


In [24]:
mol=data.dataset_maybe_noisy[1].mol
print_atoms(mol)

N,H,H,H


In [25]:
order=Chem.CanonicalRankAtoms(mol)
mol = Chem.RenumberAtoms(mol, list(order))
print_atoms(mol)

H,N,H,H


In [26]:
noisy_mol=data.dataset_maybe_noisy[1].noisy_mol
print_atoms(noisy_mol)

H,N,H,H


In [5]:
mol=data.dataset_maybe_noisy[0].mol
ps = rdDistGeom.ETKDGv3()
ps.maxIters=100

In [None]:

mol = 

In [6]:
mol.GetConformer().GetPositions()

array([[-1.26999998e-02,  1.08580005e+00,  8.00000038e-03],
       [ 2.19999999e-03, -6.00000005e-03,  2.00000009e-03],
       [ 1.01170003e+00,  1.46379995e+00,  3.00000014e-04],
       [-5.40799975e-01,  1.44749999e+00, -8.76600027e-01],
       [-5.23800015e-01,  1.43789995e+00,  9.06400025e-01]])

In [7]:
data.dataset_maybe_noisy[0].pos

tensor([[-1.2700e-02,  1.0858e+00,  8.0000e-03],
        [ 2.2000e-03, -6.0000e-03,  2.0000e-03],
        [ 1.0117e+00,  1.4638e+00,  3.0000e-04],
        [-5.4080e-01,  1.4475e+00, -8.7660e-01],
        [-5.2380e-01,  1.4379e+00,  9.0640e-01]])

In [12]:

generated=rdDistGeom.EmbedMolecule(mol,ps)

len(mol.GetConformers())

1

In [13]:
model = LNNP(args, prior_model=None, mean=data.mean, std=data.std)

In [18]:
sample=copy.deepcopy(data.dataset_maybe_noisy[4654])
copSample=copy.deepcopy(sample)

In [19]:
_,noise,_=model(sample.z,sample.pos)

In [20]:
predpos=sample.pos-noise
gpos=sample.pos-sample.pos_target
initpos=sample.pos

In [21]:
def make_conformer(mol,positions):
    id=mol.GetNumConformers()
    conformer = Chem.Conformer(mol.GetNumAtoms())
    conformer.SetId(id)
    for i in range(mol.GetNumAtoms()):
        conformer.SetAtomPosition(i, Point3D(positions[i][0].item(),positions[i][1].item(),positions[i][2].item())) 
    mol.AddConformer(conformer)

In [22]:
make_conformer(sample.mol,predpos)
make_conformer(sample.mol,gpos)
make_conformer(sample.mol,initpos)

In [23]:
for i in range(sample.mol.GetNumAtoms()):
    print(sample.mol.GetAtomWithIdx(i).GetSymbol())

C
C
N
C
N
C
O
C
H
H
H
H
H
H
H
H


In [24]:
for i in range(sample.noisy_mol.GetNumAtoms()):
    print(sample.noisy_mol.GetAtomWithIdx(i).GetSymbol())

H
O
C
N
C
H
N
C
H
H
C
H
H
H
C
H


In [9]:
conf=sample.mol.GetConformers()
for i in conf:
    print(i.GetId())

0
1
2
3


In [13]:
IPythonConsole.drawMol3D(sample.mol,confId=2)

In [24]:
for i in range(sample.mol.GetNumAtoms()):
    conf.SetAtomPosition(i, Point3D(predpos[i][0].item(),predpos[i][1].item(),predpos[i][2].item()))      
    

In [25]:
IPythonConsole.drawMol3D(sample.mol,confId=0)

In [26]:
for i in range(sample.mol.GetNumAtoms()):
    conf.SetAtomPosition(i, Point3D(sample.pos[i][0].item(),sample.pos[i][1].item(),sample.pos[i][2].item()))

In [42]:
IPythonConsole.drawMol3D(sample.mol,confId=0)
Chem.rdForceFieldHelpers.UFFOptimizeMolecule(sample.mol,confId=0,maxIters=300)

0