In [1]:
from pathlib import Path
from rdkit import Chem
from tqdm.autonotebook import tqdm

  This is separate from the ipykernel package so we can avoid doing imports until


# Training data

In [2]:
num_total = len(list(Path('docked_jacs_full/').glob('*sdf')))
num_total

108528

In [6]:
!mkdir poses_jacs

In [None]:
for idx in tqdm(range(num_total)):
    suppl = Chem.SDMolSupplier(f'docked_jacs_full/docked_{idx}.sdf')
    for i, m in enumerate(suppl):
        Chem.MolToPDBFile(m, f'poses_jacs/m_idx_{idx}_pose_{i}.pdb')

  0%|          | 0/108528 [00:00<?, ?it/s]

In [None]:
n_poses = 20

In [None]:
with open("jacs.types", "w") as text_file:
    for idx in tqdm(range(num_total)):
        for p in range(n_poses):
            file_str = f'{idx} 5FL4_receptor.pdb poses_jacs/m_idx_{idx}_pose_{p}.pdb'
            print(file_str, file=text_file)

In [4]:
from gninatorch import setup, gnina, dataloaders
from argparse import Namespace

In [5]:
class VoxelLoader(dataloaders.GriddedExamplesLoader):
    def __len__(self):
        return self.num_batches

In [6]:
grid_args = Namespace(**{'data_root':'',
                    'batch_size':20,
                    'ligmolcache':"",
                    'recmolcache':"",
                    'cache_structures':True,
                    'dimension':23.5,
                    'resolution':0.5,
                    'balanced':False,
                    'shuffle':False,
                    'stratify_receptor':False,
                    'stratify_pos':False,
                    'iteration_scheme':'small',
                    'stratify_max':0,
                    'stratify_min':0,
                    'stratify_step':0})

In [7]:
provider = setup.setup_example_provider(
    f"jacs.types", grid_args, training=False
)
grid_maker = setup.setup_grid_maker(grid_args)

all_loader = VoxelLoader(
    example_provider=provider,
    grid_maker=grid_maker,
    random_translation=0,
    random_rotation=False,
    device='cuda',
)

In [12]:
import torch

In [13]:
model = gnina.setup_gnina_model('dense')[0]

In [14]:
model.eval()
model.to(torch.device('cuda'))

DenseAffinity(
  (features): Sequential(
    (data_enc_init_pool): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (data_enc_init_conv): Conv3d(28, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (data_enc_init_conv_relu): ReLU()
    (dense_block_0): DenseBlock(
      (blocks): Sequential(
        (data_enc_level0_batchnorm_conv0): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (data_enc_level0_conv0): Conv3d(32, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        (data_enc_level0_conv0_relu): ReLU()
        (data_enc_level0_batchnorm_conv1): BatchNorm3d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (data_enc_level0_conv1): Conv3d(48, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        (data_enc_level0_conv1_relu): ReLU()
        (data_enc_level0_batchnorm_conv2): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_run

In [15]:
feats = list()
with torch.inference_mode():
    for batch in tqdm(all_loader):
        feats.append(model.features(batch[0]).squeeze()[None])
feats = torch.cat(feats)
feats.shape

  0%|          | 0/108528 [00:00<?, ?it/s]

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is 5FL4_receptor.pdb)



torch.Size([108528, 20, 224])

In [None]:
torch.save(feats.detach().cpu(), 'cnn_feats_JACS_full.pt')

In [19]:
feats.shape # [N_molecules, N_poses, feats_dim]

torch.Size([108528, 20, 224])

# Evaluation data

In [2]:
num_total = len(list(Path('ChEMBLeval_docking_results_clean/').glob('*sdf')))
num_total

3324

In [14]:
!mkdir poses_hca_chembl

In [15]:
for idx in tqdm(range(num_total)):
    suppl = Chem.SDMolSupplier(f'ChEMBLeval_docking_results_clean/docked_{idx}.sdf')
    for i, m in enumerate(suppl):
        Chem.MolToPDBFile(m, f'poses_hca_chembl/m_idx_{idx}_pose_{i}.pdb')

  0%|          | 0/3324 [00:00<?, ?it/s]

In [3]:
n_poses = 20

In [18]:
with open("caix_chembl.types", "w") as text_file:
    for idx in tqdm(range(num_total)):
        for p in range(n_poses):
            file_str = f'{idx} 5FL4_receptor.pdb poses_hca_chembl/m_idx_{idx}_pose_{p}.pdb'
            print(file_str, file=text_file)

  0%|          | 0/3324 [00:00<?, ?it/s]

In [4]:
from gninatorch import setup, gnina, dataloaders
from argparse import Namespace

In [5]:
class VoxelLoader(dataloaders.GriddedExamplesLoader):
    def __len__(self):
        return self.num_batches

In [6]:
grid_args = Namespace(**{'data_root':'',
                    'batch_size':20,
                    'ligmolcache':"",
                    'recmolcache':"",
                    'cache_structures':True,
                    'dimension':23.5,
                    'resolution':0.5,
                    'balanced':False,
                    'shuffle':False,
                    'stratify_receptor':False,
                    'stratify_pos':False,
                    'iteration_scheme':'small',
                    'stratify_max':0,
                    'stratify_min':0,
                    'stratify_step':0})

In [7]:
provider = setup.setup_example_provider(
    f"caix_chembl.types", grid_args, training=False
)
grid_maker = setup.setup_grid_maker(grid_args)

all_loader = VoxelLoader(
    example_provider=provider,
    grid_maker=grid_maker,
    random_translation=0,
    random_rotation=False,
    device='cuda',
)

In [8]:
import torch

In [9]:
model = gnina.setup_gnina_model('dense')[0]

In [10]:
model.eval()
model.to(torch.device('cuda'))

DenseAffinity(
  (features): Sequential(
    (data_enc_init_pool): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (data_enc_init_conv): Conv3d(28, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (data_enc_init_conv_relu): ReLU()
    (dense_block_0): DenseBlock(
      (blocks): Sequential(
        (data_enc_level0_batchnorm_conv0): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (data_enc_level0_conv0): Conv3d(32, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        (data_enc_level0_conv0_relu): ReLU()
        (data_enc_level0_batchnorm_conv1): BatchNorm3d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (data_enc_level0_conv1): Conv3d(48, 16, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
        (data_enc_level0_conv1_relu): ReLU()
        (data_enc_level0_batchnorm_conv2): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_run

In [11]:
feats = list()
with torch.inference_mode():
    for batch in tqdm(all_loader):
        feats.append(model.features(batch[0]).squeeze()[None])
feats = torch.cat(feats)
feats.shape

  0%|          | 0/3324 [00:00<?, ?it/s]

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is 5FL4_receptor.pdb)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is poses_hca_chembl/m_idx_1029_pose_0.pdb)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is poses_hca_chembl/m_idx_1029_pose_1.pdb)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is poses_hca_chembl/m_idx_1029_pose_2.pdb)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is poses_hca_chembl/m_idx_1029_pose_3.pdb)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is poses_hca_chembl/m_idx_1029_pose_4.pdb)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is poses_hca_chembl/m_idx_1029_pose_5.pdb)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is poses_hca_chembl/m_idx_1029_pose_6.pdb)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is poses_hca_chembl/m

torch.Size([3324, 20, 224])

In [12]:
torch.save(feats.detach().cpu(), 'cnn_feats_hca_ChEMBL.pt')