In [1]:
import torch
from torch_geometric.datasets import MoleculeNet

dataset = MoleculeNet(root='../../Data/MoleculeNet', name='BACE')

In [2]:
import pandas as pd

smiles_list = pd.read_csv('../../Data/MoleculeNet/bace/raw/bace.csv')['mol'].tolist()
smiles_list

['O1CC[C@@H](NC(=O)[C@@H](Cc2cc3cc(ccc3nc2N)-c2ccccc2C)C)CC1(C)C',
 'Fc1cc(cc(F)c1)C[C@H](NC(=O)[C@@H](N1CC[C@](NC(=O)C)(CC(C)C)C1=O)CCc1ccccc1)[C@H](O)[C@@H]1[NH2+]C[C@H](OCCC)C1',
 'S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H]([C@H](O)C[NH2+]Cc1cc(OC)ccc1)Cc1ccccc1)C',
 'S1(=O)(=O)C[C@@H](Cc2cc(O[C@H](COCC)C(F)(F)F)c(N)c(F)c2)[C@H](O)[C@@H]([NH2+]Cc2cc(ccc2)C(C)(C)C)C1',
 'S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H]([C@H](O)C[NH2+]Cc1cc(ccc1)C(F)(F)F)Cc1ccccc1)C',
 'S1(=O)C[C@@H](Cc2cc(OC(C(F)(F)F)C(F)(F)F)c(N)c(F)c2)[C@H](O)[C@@H]([NH2+]Cc2cc(ccc2)C(C)(C)C)C1',
 'S(=O)(=O)(CCCCC)C[C@@H](NC(=O)c1cccnc1)C(=O)N[C@H]([C@H](O)C[NH2+]Cc1cc(ccc1)CC)Cc1cc(F)cc(F)c1',
 'Fc1c2c(ccc1)[C@@]([NH+]=C2N)(C=1C=C(C)C(=O)N(C=1)CC)c1cc(ccc1)-c1cc(cnc1)C#CC',
 'O1c2c(cc(cc2)CC)[C@@H]([NH2+]C[C@@H](O)[C@H]2NC(=O)C=3C=CC(=O)N(CCCCc4cc(C2)ccc4)C=3)CC12CCC2',
 'O=C1N(CCCC1)C(C)(C)[C@@H]1C[C@@H](CCC1)C(=O)N[C@H]([C@H](O)C[NH2+]Cc1cc(ccc1)C(C)C)Cc1ccccc1',
 'Fc1cc(cc(F)c1)C[C@H](NC(=O)c1cc(cc(c1)C)C(=O)N(

In [4]:
import sys
sys.path.append('../../')
sys.path.append('../../Libs')
from Libs.splitting import scaffold_split

In [5]:
train_dataset, valid_dataset, test_dataset = scaffold_split(dataset, smiles_list, task_idx=None, null_value=0, frac_train=0.8,frac_valid=0.1, frac_test=0.1)

In [12]:
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)

In [9]:
import torch
from torch.nn import Linear, Parameter, Sequential, BatchNorm1d, ReLU, Dropout
from torch_geometric.nn import MessagePassing, GCNConv, GATv2Conv, GINConv, GINEConv, global_mean_pool
from torch_geometric.utils import add_self_loops, remove_self_loops, degree, softmax

sys.path.append('./')
from Libs.layers import *
from Libs.common import *

In [8]:
class EINModel_v3(torch.nn.Module):
    def __init__(self, input_dim, dim_h, final_dim, num_heads, edge_dim, **kwargs):
        super().__init__()
        torch.manual_seed(42)

        # Layers
        self.conv1 = EINv3(input_dim, 
                           dim_h,
                           edge_dim=edge_dim, 
                           heads=num_heads, 
                           **kwargs)
        
        self.conv2 = EINv3(dim_h * num_heads, 
                               dim_h,
                               edge_dim=edge_dim, 
                               heads=num_heads, 
                               **kwargs)
        
        self.conv3 = EINv3(dim_h * num_heads, 
                               dim_h, 
                               edge_dim=edge_dim, 
                               heads=num_heads, 
                               concat=False, 
                               **kwargs)

        # Linear layer
        self.lin1 = Linear(dim_h * 3, dim_h * 3)

        # Classification head
        self.lin2 = Linear(dim_h * 3, final_dim)

    def forward(self, x, edge_index, edge_attr, batch):
        # Embedding
        h1 = self.conv1(x, edge_index, edge_attr)
        h1 = h1.relu()
        h2 = self.conv2(h1, edge_index, edge_attr)
        h2 = h2.relu()
        h3 = self.conv3(h2, edge_index, edge_attr)
        h3 = h3.relu()

        C = h3.shape[-1]  # dim_h
        H = h2.shape[-1] // C  # num_heads

        # Graph-level readout
        h1 = global_mean_pool(h1.view(-1, H, C).mean(dim=1), batch)
        h2 = global_mean_pool(h2.view(-1, H, C).mean(dim=1), batch)
        h3 = global_mean_pool(h3, batch)

        h = torch.cat((h1, h2, h3), dim=1)

        # Classifier
        h = self.lin1(h)
        h = h.relu()
        h = F.dropout(h, p=0.5, training=self.training)
        h = self.lin2(h)

        # return F.log_softmax(h, dim=1)
        return h.flatten()


In [19]:
for batch in train_loader:
    sample_batch = batch
    print(batch)
    break

DataBatch(x=[947, 9], edge_index=[2, 2022], edge_attr=[2022, 3], smiles=[32], y=[32, 1], batch=[947], ptr=[33])


In [29]:
layer = EINv3(9, 64, 16, edge_dim=3, concat=False)
layer

EINv3(9, 64, heads=16)

In [30]:
layer(sample_batch.x.float(), sample_batch.edge_index, sample_batch.edge_attr.float()).shape

torch.Size([947, 64])

In [22]:
res = ResidualBlockGNN(EINv3)
res.forward(sample_batch.x, 9, 64, edge_dim=3).shape

TypeError: empty(): argument 'size' must be tuple of ints, but found element of type Tensor at pos 2

In [None]:
class LayerWrapper(nn.Module):
    
    def __init__(self, input_dim, dim_h, edge_dim, num_heads, act_fn, **kwargs):
        super().__init__()

        self.net = nn.Sequential(
                        EINv3(in_channels=input_dim, 
                              out_channels=dim_h,
                              edge_dim=edge_dim, 
                              heads=num_heads, 
                              **kwargs),
                        act_fn()
        )

    def forward(self, x, edge_index, edge_attr, batch):
        out = self.net(x, edge_index, edge_attr, batch)
        return out + x # out and x have to same dimension!

In [None]:
class DenseBlock(nn.Module):
    
    def __init__(self, input_dim, dim_h, edge_dim, num_heads, num_layers, **kwargs):
        super().__init__()

        layers = []
        for layer_idx in range(num_layers):
            layers.append(
                EINv3(in_channels=input_dim, 
                      out_channels=dim_h,
                     edge_dim=edge_dim, 
                     heads=num_heads, 
                    **kwargs)
            )

        self.block = nn.Sequential(*layers)

    def forward(self, x, edge_index, edge_attr, batch):
        out = self.block(x)