In [1]:
from typing import Optional

import os
import warnings
import os.path as osp
from math import pi as PI

import torch
import torch.nn.functional as F
from torch.nn import Embedding, Sequential, Linear, ModuleList
import numpy as np

from torch_scatter import scatter
from torch_geometric.data.makedirs import makedirs
from torch_geometric.data import download_url, extract_zip, Dataset
from torch_geometric.nn import radius_graph, MessagePassing

def atomic_forward(self, z, pos, batch=None):
    assert z.dim() == 1 and z.dtype == torch.long
    batch = torch.zeros_like(z) if batch is None else batch

    h = self.embedding(z)

    edge_index = radius_graph(pos, r=self.cutoff, batch=batch,
                              max_num_neighbors=self.max_num_neighbors)
    row, col = edge_index
    edge_weight = (pos[row] - pos[col]).norm(dim=-1)
    edge_attr = self.distance_expansion(edge_weight)

    for interaction in self.interactions:
        h = h + interaction(h, edge_index, edge_weight, edge_attr)

    h = self.lin1(h)
    h = self.act(h)
    h = self.lin2(h)

    if self.dipole:
        # Get center of mass.
        mass = self.atomic_mass[z].view(-1, 1)
        c = scatter(mass * pos, batch, dim=0) / scatter(mass, batch, dim=0)
        h = h * (pos - c.index_select(0, batch))

    if not self.dipole and self.mean is not None and self.std is not None:
        h = h * self.std + self.mean

    if not self.dipole and self.atomref is not None:
        h = h + self.atomref(z)

    out = scatter(h, batch, dim=0, reduce=self.readout)

    if self.dipole:
        out = torch.norm(out, dim=-1, keepdim=True)

    if self.scale is not None:
        out = self.scale * out

    return out

  return torch._C._cuda_getDeviceCount() > 0


In [2]:
data_dir = 'data/'

In [4]:
# run once to preprocess datasets and generate chunks
# dataset = ConfEnsembleDataset()
# dataset = ConfEnsembleDataset(dataset='platinum') # 16G

In [7]:
pdbbind_chunks = [filename for filename in os.listdir(os.path.join(data_dir, 'processed')) if filename.startswith('pdbbind')]
pdbbind_n_chunks = len(pdbbind_chunks)

In [2]:
iteration = 0
with open(os.path.join(data_dir, 'scaffold_splits', f'train_smiles_scaffold_split_{iteration}.txt'), 'r') as f :
    train_smiles = f.readlines()
    train_smiles = [smiles.strip() for smiles in train_smiles]

with open(os.path.join(data_dir, 'scaffold_splits', f'val_smiles_scaffold_split_{iteration}.txt'), 'r') as f :
    val_smiles = f.readlines()
    val_smiles = [smiles.strip() for smiles in val_smiles]

with open(os.path.join(data_dir, 'scaffold_splits', f'test_smiles_scaffold_split_{iteration}.txt'), 'r') as f :
    test_smiles = f.readlines()
    test_smiles = [smiles.strip() for smiles in test_smiles]

train_datasets = []
val_datasets = []
test_datasets = []

for chunk_number in tqdm(range(pdbbind_n_chunks)) :

    dataset = ConfEnsembleDataset(loaded_chunk=chunk_number,
                                  smiles_list=train_smiles)
    train_datasets.append(dataset)

    dataset = ConfEnsembleDataset(loaded_chunk=chunk_number,
                                  smiles_list=val_smiles)
    val_datasets.append(dataset)

    dataset = ConfEnsembleDataset(loaded_chunk=chunk_number,
                                  smiles_list=test_smiles)
    test_datasets.append(dataset)

train_dataset = ConcatDataset(train_datasets)
val_dataset = ConcatDataset(val_datasets)
test_dataset = ConcatDataset(test_datasets)

NameError: name 'data_dir' is not defined