In [4]:
import os
os.environ['DGLBACKEND'] = 'pytorch'
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

import dgl.function as fn
import dgl
from dgllife.data import Tox21
from dgllife.utils import SMILESToBigraph, CanonicalAtomFeaturizer, CanonicalBondFeaturizer, RandomSplitter
from torch.nn import BCEWithLogitsLoss
from torch.optim import Adam
from dgl.data.utils import split_dataset
from torch.utils.data import DataLoader
from sklearn.metrics import roc_auc_score as rac
import torch.optim as optim
from tqdm.notebook import tqdm,trange
from scipy import signal

In [48]:
import cupy

In [5]:
smiles_to_g = SMILESToBigraph(node_featurizer=CanonicalAtomFeaturizer(), edge_featurizer=CanonicalBondFeaturizer())

In [6]:
dataset = Tox21(smiles_to_g)

Downloading C:\Users\dhrub\.dgl/tox21.csv.gz from https://data.dgl.ai/dataset/tox21.csv.gz...
Processing dgl graphs from scratch...
Processing molecule 1000/7831
Processing molecule 2000/7831
Processing molecule 3000/7831
Processing molecule 4000/7831
Processing molecule 5000/7831
Processing molecule 6000/7831
Processing molecule 7000/7831


In [7]:
dataset[0]

('CCOc1ccc2nc(S(N)(=O)=O)sc2c1',
 Graph(num_nodes=16, num_edges=34,
       ndata_schemes={'h': Scheme(shape=(74,), dtype=torch.float32)}
       edata_schemes={'e': Scheme(shape=(12,), dtype=torch.float32)}),
 tensor([0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.]),
 tensor([1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1.]))

In [8]:
#Batching a list of datapoints for dataloader.
def collate_molgraphs(data):
    smiles, graphs, labels, masks = map(list, zip(*data))

    g = dgl.batch(graphs)
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)
    labels = torch.stack(labels, dim=0)
    masks = torch.stack(masks, dim=0)
    return smiles, g, labels, masks

In [9]:
train_set, val_set, test_set = split_dataset(dataset, shuffle=True)
train_loader = DataLoader(train_set, batch_size=128, shuffle=True, collate_fn=collate_molgraphs)
val_loader = DataLoader(val_set, batch_size=128, shuffle=True, collate_fn=collate_molgraphs)
test_loader = DataLoader(test_set, batch_size=128, shuffle=True, collate_fn=collate_molgraphs)

In [26]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [27]:
device

'cuda'

In [30]:
class Meter(object):
    """Track and summarize model performance on a dataset for
    (multi-label) binary classification."""

    def __init__(self):
        self.mask = []
        self.y_pred = []
        self.y_true = []

    def update(self, y_pred, y_true, mask):
        """Update for the result of an iteration
        Parameters
        ----------
        y_pred : float32 tensor
            Predicted molecule labels with shape (B, T),
            B for batch size and T for the number of tasks
        y_true : float32 tensor
            Ground truth molecule labels with shape (B, T)
        mask : float32 tensor
            Mask for indicating the existence of ground
            truth labels with shape (B, T)
        """
        self.y_pred.append(y_pred.detach().cpu())
        self.y_true.append(y_true.detach().cpu())
        self.mask.append(mask.detach().cpu())

    def roc_auc_score(self):
        """Compute roc-auc score for each task.
        Returns
        -------
        list of float
            roc-auc score for all tasks
        """
        mask = torch.cat(self.mask, dim=0)
        y_pred = torch.cat(self.y_pred, dim=0)
        y_true = torch.cat(self.y_true, dim=0)
        # This assumes binary case only
        y_pred = torch.sigmoid(y_pred)
        n_tasks = y_true.shape[1]
        scores = []
        for task in range(n_tasks):
            task_w = mask[:, task]
            task_y_true = y_true[:, task][task_w != 0].numpy()
            task_y_pred = y_pred[:, task][task_w != 0].numpy()
            scores.append(rac(task_y_true, task_y_pred))
        return scores

In [31]:
def run_an_eval_epoch(model, data_loader):
    model.eval()
    eval_meter = Meter()
    with torch.no_grad():
        for batch_id, batch_data in enumerate(data_loader):
            smiles, g, labels, masks = batch_data
            atom_feats = g.ndata.pop('h')
            bond_feats = g.edata.pop('e')
            atom_feats, bond_feats, labels = (atom_feats.to(device), bond_feats.to(device), labels.to(device))
            logits = model(g, atom_feats, bond_feats)
            eval_meter.update(logits, labels, masks)
    return np.mean(eval_meter.roc_auc_score())

In [40]:
train_ids = torch.arange(1000)
def run_a_train_epoch(train_loader, val_loader, model):
    loss_criterion = BCEWithLogitsLoss(pos_weight=torch.tensor(dataset.task_pos_weights(train_ids)).to(device), reduction="none")
    optimizer = Adam(model.parameters(), lr=1e-3)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)
    
    for epoch in range(2):
        model.train()
        train_meter = Meter()
        total_loss = 0
        for batch_id, batch_data in enumerate(train_loader):
            smiles, g, labels, masks = batch_data
            atom_feats = g.ndata.pop('h')
            bond_feats = g.edata.pop('e')
            atom_feats, bond_feats, labels, masks, g=(
                atom_feats.to(device),
                bond_feats.to(device),
                labels.to(device),
                masks.to(device),
                g.to(device)
            )
            logits = model(g, atom_feats, bond_feats)
            # Mask non-existing labels
            loss = (loss_criterion(logits, labels) * (masks != 0).float()).mean()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step()
        train_acc = run_an_eval_epoch(model, train_loader)
        valid_acc = run_an_eval_epoch(model, val_loader)
        print(
            "Epoch {:05d} | Loss {:.4f} | Train Acc. {:.4f} | Validation Acc. {:.4f} ".format(
                epoch, total_loss / 10, train_acc, valid_acc
            )
        )
        train_meter.update(logits, labels, masks)
    train_score = np.mean(train_meter.roc_auc_score())
    print(
        "epoch {:d}/{:d}, training roc-auc {:.4f}".format(epoch + 1, 10 , train_score)
    )

class GATLayer(nn.Module):
    def __init__(self, in_feat, out_feat, e_feat):
        super(GATLayer, self).__init__()
        #self.g = g
        # equation (1)
        self.fc = nn.Linear(in_feat, out_feat, bias=False)
        # equation (2)
        self.attn_fc = nn.Linear(2 * out_feat, 1, bias=False)
        self.reset_parameters()

    def reset_parameters(self):
        """Reinitialize learnable parameters."""
        gain = nn.init.calculate_gain('relu')
        nn.init.xavier_normal_(self.fc.weight, gain=gain)
        nn.init.xavier_normal_(self.attn_fc.weight, gain=gain)

    def edge_attention(self, edges):
        # edge UDF for equation (2)
        z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1)
        a = self.attn_fc(z2)
        return {'e': F.leaky_relu(a)}

    def message_func(self, edges):
        # message UDF for equation (3) & (4)
        return {'z': edges.src['z'], 'e': edges.data['e']}

    def reduce_func(self, nodes):
        # reduce UDF for equation (3) & (4)
        # equation (3)
        alpha = F.softmax(nodes.mailbox['e'], dim=1)
        # equation (4)
        h = torch.sum(alpha * nodes.mailbox['z'], dim=1)
        return {'h': h}

    def forward(self, g, h, e):
        # equation (1)
        #self.g = g
        z = self.fc(h)
        self.g.ndata['z'] = z
        # equation (2)
        self.g.apply_edges(self.edge_attention)
        # equation (3) & (4)
        self.g.update_all(self.message_func, self.reduce_func)
        return self.g.ndata.pop('h')

In [92]:
class GATLayer1(nn.Module):
    def __init__(self, in_feat, out_feat, e_feat):
        super(GATLayer1, self).__init__()
        #self.g = g
        # equation (1)
        self.fc = nn.Linear(in_feat+out_feat, out_feat, bias=False)
        self.fc1 = nn.Linear(e_feat, out_feat, bias = False)
        self.fc2 = nn.Linear(in_feat, out_feat, bias=False)
        # equation (2)
        self.attn_fc = nn.Linear(2 * out_feat, 1, bias=False)
        self.reset_parameters()

    def reset_parameters(self):
        """Reinitialize learnable parameters."""
        gain = nn.init.calculate_gain('relu')
        nn.init.xavier_normal_(self.fc.weight, gain=gain)
        nn.init.xavier_normal_(self.attn_fc.weight, gain=gain)

    def edge_attention(self, edges):
        # edge UDF for equation (2)
        z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1)
        a = self.attn_fc(z2)
        return {'e': F.leaky_relu(a)}
    
    def edge_attention1(self, edges):
    
        # edge UDF for equation (2)
        catfeat = torch.cat([edges.src['z'], edges.dst['z']], dim=1)
        node_embedding=[]
        for i in range(0, len(catfeat)):
            u = edges.data['y'][i]
            v = catfeat[i]
            #res = torch.from_numpy(signal.fftconvolve(((v.cuda()).detach().cpu().clone().numpy()), (u.cuda().detach().cpu().clone().numpy()), mode='same'))
            res = torch.as_tensor(signal.fftconvolve(cupy.asarray(v), cupy.asarray(u), mode='same'))
            node_embedding.append(res)
        probs = torch.stack([node_embedding[i].clone().detach().requires_grad_(True) for i in range(0,len(node_embedding))])
        a = self.attn_fc(probs)
        return {'f': F.leaky_relu(a)}

    def message_func(self, edges):
        # message UDF for equation (3) & (4)
        return {'z': edges.src['z'], 'f': edges.data['f']}

    def reduce_func(self, nodes):
        # reduce UDF for equation (3) & (4)
        # equation (3)
        alpha = F.softmax(nodes.mailbox['f'], dim=1)
        #alpha = nodes.mailbox['f']
        # equation (4)
        k = torch.sum(alpha * nodes.mailbox['z'], dim=1)
        return {'k': k}

    def forward(self, g, h, e):
        # equation (1)
        #self.g = g
        with g.local_scope():
            g.ndata["h"] = h
            g.edata["e"] = e
            #print(h.size())
            z = self.fc2(h)
            y = self.fc1(e)
            #print(z.size())

            g.ndata['z'] = z
            g.edata['y'] = y
        # equation (2)
            g.apply_edges(self.edge_attention1)
        # equation (3) & (4)
            g.update_all(self.message_func, self.reduce_func)
            h_N = g.ndata["k"]
            #print(h_N.size())
            #print(h.size())
            h_total = torch.cat([h, h_N], dim=1)
            h_new = self.fc(h_total)
            return [h_new, y]

In [58]:
class MultiHeadGATLayer(nn.Module):
    def __init__(self, in_feats, h_feats, e_feats, num_classes, num_heads, merge='cat'):
        super(MultiHeadGATLayer, self).__init__()
        self.heads = nn.ModuleList()
        for i in range(num_heads):
            self.heads.append(GATLayer1(in_feats, h_feats, e_feats))
        self.merge = merge

    def forward(self, g, in_feat, e_feat):
        head_outs_node = [attn_head(g, in_feat, e_feat)[0] for attn_head in self.heads]
        head_outs_edge = [attn_head(g, in_feat, e_feat)[1] for attn_head in self.heads]
        if self.merge == 'cat':
            # concat on the output feature dimension (dim=1)
            #print(type(head_outs_node))
            #print(type(head_outs_edge))
            return torch.cat(head_outs_node, dim=1), torch.cat(head_outs_edge, dim=1)
        else:
            # merge using average
            return torch.mean(torch.stack(head_outs))

class GAT(nn.Module):
    def __init__(self, in_feats, h_feats, e_feats, num_classes):
        super(GAT, self).__init__()
        self.layer1 = GATLayer1(in_feats, h_feats, e_feats)
        # Be aware that the input dimension is hidden_dim*num_heads since
        # multiple head outputs are concatenated together. Also, only
        # one attention head in the output layer.
        self.layer2 = GATLayer1(h_feats, num_classes, e_feats)

    def forward(self, g, in_feat, e_feat):
        h = self.layer1(g, in_feat, e_feat)
        #print(h.size())
        h = F.elu(h)
        h = self.layer2(g, h, e_feat)
        g.ndata["h"] = h
        return dgl.max_nodes(g, "h")

In [57]:
class GAT1(nn.Module):
    def __init__(self, in_feats, h_feats, e_feats, num_classes, num_heads):
        super(GAT1, self).__init__()
        self.layer1 = MultiHeadGATLayer(in_feats, h_feats, e_feats,__, num_heads)
        # Be aware that the input dimension is hidden_dim*num_heads since
        # multiple head outputs are concatenated together. Also, only
        # one attention head in the output layer.
        #self.layer2 = MultiHeadGATLayer(h_feats*num_heads, num_classes, e_feats,__,1)
        self.layer2 = MultiHeadGATLayer(h_feats*num_heads, num_classes, h_feats*num_heads,__,1)

    def forward(self, g, in_feat, e_feat):
        #print(self.layer1(g, in_feat, e_feat)[1])
        h, e = self.layer1(g, in_feat, e_feat)
        #print(h.size())
        h = F.elu(h)
        e = F.elu(e)
        h, e = self.layer2(g, h, e)
        g.ndata["h"] = h
        return dgl.max_nodes(g, "h")

In [93]:
model = GAT1(
            in_feats = 74,
            h_feats = 60,
            e_feats = 12,
            num_classes = 12,
            num_heads=2).to(device)

In [61]:
print(model)

GAT1(
  (layer1): MultiHeadGATLayer(
    (heads): ModuleList(
      (0-1): 2 x GATLayer1(
        (fc): Linear(in_features=134, out_features=60, bias=False)
        (fc1): Linear(in_features=12, out_features=60, bias=False)
        (fc2): Linear(in_features=74, out_features=60, bias=False)
        (attn_fc): Linear(in_features=120, out_features=1, bias=False)
      )
    )
  )
  (layer2): MultiHeadGATLayer(
    (heads): ModuleList(
      (0): GATLayer1(
        (fc): Linear(in_features=132, out_features=12, bias=False)
        (fc1): Linear(in_features=120, out_features=12, bias=False)
        (fc2): Linear(in_features=120, out_features=12, bias=False)
        (attn_fc): Linear(in_features=24, out_features=1, bias=False)
      )
    )
  )
)


In [94]:
print("Training...")
run_a_train_epoch(train_loader, val_loader, model)

Training...


  loss_criterion = BCEWithLogitsLoss(pos_weight=torch.tensor(dataset.task_pos_weights(train_ids)).to(device), reduction="none")


TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [60]:
test_score = run_an_eval_epoch(model, test_loader)
print("Test score {:.4f}".format(test_score))

DGLError: Cannot assign node feature "h" on device cuda:0 to a graph on device cpu. Call DGLGraph.to() to copy the graph to the same device.

In [22]:
pip install ipywidgets

Note: you may need to restart the kernel to use updated packages.
