deepchem · arunppsg · Mar 15, 2023 · Jan 17, 2023 · Mar 10, 2023 · Mar 10, 2023
diff --git a/deepchem/models/__init__.py b/deepchem/models/__init__.py
@@ -49,6 +49,7 @@
     from deepchem.models.torch_models import MEGNetModel
     from deepchem.models.torch_models import CNN
     from deepchem.models.torch_models import ScaledDotProductAttention, SelfAttention
+    from deepchem.models.torch_models import GroverReadout
 except ModuleNotFoundError as e:
     logger.warning(
         f'Skipped loading some PyTorch models, missing a dependency. {e}')

diff --git a/deepchem/models/torch_models/__init__.py b/deepchem/models/torch_models/__init__.py
@@ -19,6 +19,7 @@
 from deepchem.models.torch_models.layers import MultilayerPerceptron, CNNModule, CombineMeanStd, WeightedLinearCombo, AtomicConvolution, NeighborList
 from deepchem.models.torch_models.cnn import CNN
 from deepchem.models.torch_models.attention import ScaledDotProductAttention, SelfAttention
+from deepchem.models.torch_models.readout import GroverReadout
 try:
     from deepchem.models.torch_models.dmpnn import DMPNN, DMPNNModel
 except ModuleNotFoundError as e:

diff --git a/deepchem/models/torch_models/readout.py b/deepchem/models/torch_models/readout.py
@@ -0,0 +1,88 @@
+from typing import List
+try:
+    import torch
+    import torch.nn as nn
+except ModuleNotFoundError:
+    raise ImportError('The module requires PyTorch to be installed')
+
+from deepchem.models.torch_models.attention import SelfAttention
+
+
+class GroverReadout(nn.Module):
+    """Performs readout on a batch of graph
+
+    The readout module is used for performing readouts on batched graphs to
+    convert node embeddings/edge embeddings into graph embeddings. It is used
+    in the Grover architecture to generate a graph embedding from node and edge
+    embeddings. The generate embedding can be used in downstream tasks like graph
+    classification or graph prediction problems.
+
+    Parameters
+    ----------
+    rtype: str
+        Readout type, can be 'mean' or 'self-attention'
+    hidden_size: int
+        Input layer hidden size
+    attn_hidden_size: int
+        If readout type is attention, size of hidden layer in attention network.
+    attn_out_size: int
+        If readout type is attention, size of attention out layer.
+
+    Example
+    -------
+    >>> import torch
+    >>> from deepchem.models.torch_models.readout import GroverReadout
+    >>> n_nodes, n_features = 6, 32
+    >>> readout = GroverReadout(rtype="mean")
+    >>> embedding = torch.ones(n_nodes, n_features)
+    >>> result = readout(embedding, scope=[(0, 6)])
+    >>> result.size()
+    torch.Size([1, 32])
+    """
+
+    def __init__(self,
+                 rtype: str = 'mean',
+                 in_features: int = 128,
+                 attn_hidden_size: int = 32,
+                 attn_out_size: int = 32):
+        super(GroverReadout, self).__init__()
+        self.cached_zero_vector = nn.Parameter(torch.zeros(in_features),
+                                               requires_grad=False)
+        self.rtype = rtype
+        if rtype == "self_attention":
+            self.attn = SelfAttention(hidden_size=attn_hidden_size,
+                                      in_features=in_features,
+                                      out_features=attn_out_size)
+
+    def forward(self, graph_embeddings: torch.Tensor,
+                scope: List[List]) -> torch.Tensor:
+        """Given a batch node/edge embedding and a scope list, produce the graph-level embedding by scope.
+
+        Parameters
+        ----------
+        embeddings: torch.Tensor
+            The embedding matrix, num_nodes x in_features or num_edges x in_features.
+        scope: List[List]
+            A list, in which the element is a list [start, range]. `start` is the index,
+            `range` is the length of scope. (start + range = end)
+
+        Returns
+        ----------
+        graph_embeddings: torch.Tensor
+            A stacked tensor containing graph embeddings of shape len(scope) x in_features if readout type is mean or len(scope) x attn_out_size when readout type is self-attention.
+        """
+        embeddings: List[torch.Tensor] = []
+        for _, (a_start, a_size) in enumerate(scope):
+            if a_size == 0:
+                embeddings.append(self.cached_zero_vector)
+            else:
+                embedding = graph_embeddings.narrow(0, a_start, a_size)
+                if self.rtype == "self_attention":
+                    embedding, attn = self.attn(embedding)
+                    embedding = embedding.flatten()
+                elif self.rtype == "mean":
+                    embedding = embedding.sum(dim=0) / a_size
+                embeddings.append(embedding)
+
+        graph_embeddings = torch.stack(embeddings, dim=0)
+        return graph_embeddings
diff --git a/deepchem/models/torch_models/tests/test_readout.py b/deepchem/models/torch_models/tests/test_readout.py
@@ -0,0 +1,39 @@
+import pytest
+
+try:
+    import torch
+except ModuleNotFoundError:
+    pass
+
+
+@pytest.mark.torch
+def testGroverReadout():
+    from deepchem.models.torch_models.readout import GroverReadout
+    n_nodes, n_features = 6, 32
+    readout_mean = GroverReadout(rtype="mean")
+
+    # testing a simple scenario where each embedding corresponds to an unique graph
+    embedding = torch.ones(n_nodes, n_features)
+    scope = [(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)]
+    readout = readout_mean(embedding, scope)
+    assert readout.shape == (n_nodes, n_features)
+    assert (readout == torch.ones(n_nodes, n_features)).all().tolist()
+
+    # here embeddings 0, 1 belong to a scope, 2, 3 to another scope and 4, 5 to another scope
+    # thus, we sill have 3 graphs
+    n_graphs = n_nodes // 2
+    scope = [(0, 2), (2, 2), (4, 2)]
+    embedding[torch.tensor([0, 2, 4])] = torch.zeros_like(
+        embedding[torch.tensor([0, 2, 4])])
+    readout = readout_mean(embedding, scope)
+    assert readout.shape == (n_graphs, n_features)
+    assert (readout == torch.ones(n_graphs, n_features) / 2).all().tolist()
+
+    attn_out = 8
+    readout_attn = GroverReadout(rtype="self_attention",
+                                 in_features=n_features,
+                                 attn_hidden_size=32,
+                                 attn_out_size=attn_out)
+
+    readout = readout_attn(embedding, scope)
+    assert readout.shape == (n_graphs, attn_out * n_features)
diff --git a/docs/source/api_reference/layers.rst b/docs/source/api_reference/layers.rst
@@ -195,6 +195,12 @@ Attention Layers
 .. autoclass:: deepchem.models.torch_models.attention.SelfAttention
   :members:
 
+Readout Layers
+^^^^^^^^^^^^^^
+
+.. autoclass:: deepchem.models.torch_models.readout.GroverReadout
+   :members:
+
 Jax Layers
 ----------