Resolve #157 Doc cross references not work properly (#159)

* Polish docs
asyml · Aug 26, 2019 · 625eea7 · 625eea7
1 parent a1f3ac5
commit 625eea7
Show file tree

Hide file tree

Showing 20 changed files with 81 additions and 65 deletions.
diff --git a/docs/code/core.rst b/docs/code/core.rst
@@ -4,6 +4,7 @@
 Core
 ****
 
+.. _attention-mechanism:
 
 Attention Mechanism
 ===================

diff --git a/docs/code/utils.rst b/docs/code/utils.rst
@@ -188,6 +188,15 @@ Misc
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 .. autofunction:: texar.torch.utils.beam_search.beam_search
 
+:hidden:`flatten`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: texar.torch.utils.nest.flatten
+
+
+:hidden:`pack_sequence_as`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autofunction:: texar.torch.utils.nest.pack_sequence_as
+
 
 
 AverageRecorder

diff --git a/texar/torch/modules/classifiers/bert_classifier.py b/texar/torch/modules/classifiers/bert_classifier.py
@@ -46,7 +46,7 @@ class BERTClassifier(ClassifierBase, PretrainedBERTMixin):
     Args:
         pretrained_model_name (optional): a `str`, the name
             of pre-trained model (e.g., ``bert-base-uncased``). Please refer to
-            :class:`~texar.torch.modules.pretrained.PretrainedBERTMixin` for
+            :class:`~texar.torch.modules.PretrainedBERTMixin` for
             all supported models.
             If `None`, the model name in :attr:`hparams` is used.
         cache_dir (optional): the path to a folder in which the

diff --git a/texar/torch/modules/classifiers/gpt2_classifier.py b/texar/torch/modules/classifiers/gpt2_classifier.py
@@ -46,7 +46,7 @@ class GPT2Classifier(ClassifierBase, PretrainedGPT2Mixin):
     Args:
         pretrained_model_name (optional): a `str`, the name
             of pre-trained model (e.g., ``gpt2-small``). Please refer to
-            :class:`~texar.torch.modules.pretrained.PretrainedGPT2Mixin` for
+            :class:`~texar.torch.modules.PretrainedGPT2Mixin` for
             all supported models.
             If `None`, the model name in :attr:`hparams` is used.
         cache_dir (optional): the path to a folder in which the

diff --git a/texar/torch/modules/classifiers/xlnet_classifier.py b/texar/torch/modules/classifiers/xlnet_classifier.py
@@ -44,7 +44,7 @@ class XLNetClassifier(ClassifierBase, PretrainedXLNetMixin):
     Args:
         pretrained_model_name (optional): a `str`, the name
             of pre-trained model (e.g., ``xlnet-based-cased``). Please refer to
-            :class:`~texar.torch.modules.pretrained.PretrainedXLNetMixin` for
+            :class:`~texar.torch.modules.PretrainedXLNetMixin` for
             all supported models.
             If `None`, the model name in :attr:`hparams` is used.
         cache_dir (optional): the path to a folder in which the

diff --git a/texar/torch/modules/connectors/connectors.py b/texar/torch/modules/connectors/connectors.py
@@ -302,8 +302,7 @@ class ForwardConnector(ConnectorBase):
     :attr:`output_size`, or must have the same number of elements and be
     re-packable into the structure of :attr:`output_size`. Note that if input
     is or contains a ``dict`` instance, the keys will be sorted to pack in
-    deterministic order (See :func:`~texar.torch.utils.nest.pack_sequence_as`
-    for more details).
+    deterministic order (See :func:`~texar.torch.utils.nest.pack_sequence_as`).
     """
 
     def __init__(self,

diff --git a/texar/torch/modules/decoders/decoder_helpers.py b/texar/torch/modules/decoders/decoder_helpers.py
@@ -50,9 +50,9 @@
 IDType = TypeVar('IDType', bound=torch.Tensor)
 
 
+# Helper instances are used by :class:`texar.torch.modules.DecoderBase`.
 class Helper(Generic[IDType], ABC):
     r"""Interface for implementing sampling in seq2seq decoders.
-    Helper instances are used by :class:`~texar.torch.DecoderBase`.
 
     Please refer to the documentation for the TensorFlow counterpart
     `tf.contrib.seq2seq.Helper

diff --git a/texar/torch/modules/decoders/gpt2_decoder.py b/texar/torch/modules/decoders/gpt2_decoder.py
@@ -32,17 +32,17 @@ class GPT2Decoder(TransformerDecoder, PretrainedGPT2Mixin):
     r"""Raw GPT2 Transformer for decoding sequences.
 
     This module basically stacks
-    :class:`~texar.torch.modules.embedders.WordEmbedder`,
-    :class:`~texar.torch.modules.embedders.PositionEmbedder`,
-    :class:`~texar.torch.modules.encoders.TransformerDecoder`.
+    :class:`~texar.torch.modules.WordEmbedder`,
+    :class:`~texar.torch.modules.PositionEmbedder`,
+    :class:`~texar.torch.modules.TransformerDecoder`.
 
     This module supports the architecture first proposed
     in `(Radford et al.)` GPT2.
 
     Args:
         pretrained_model_name (optional): a `str`, the name
             of pre-trained model (e.g., ``gpt2-small``). Please refer to
-            :class:`~texar.torch.modules.pretrained.PretrainedGPT2Mixin` for
+            :class:`~texar.torch.modules.PretrainedGPT2Mixin` for
             all supported models.
             If `None`, the model name in :attr:`hparams` is used.
         cache_dir (optional): the path to a folder in which the

diff --git a/texar/torch/modules/decoders/rnn_decoder_base.py b/texar/torch/modules/decoders/rnn_decoder_base.py
@@ -139,7 +139,7 @@ def forward(self,  # type: ignore
                 and outputs have the correct values and that backprop ignores
                 time steps that were marked as finished.
             helper (optional): An instance of
-                :class:`texar.torch.modules.decoders.Helper`
+                :class:`~texar.torch.modules.Helper`
                 that defines the decoding strategy. If given,
                 ``decoding_strategy`` and helper configurations in
                 :attr:`hparams` are ignored.

diff --git a/texar/torch/modules/decoders/rnn_decoders.py b/texar/torch/modules/decoders/rnn_decoders.py
@@ -121,8 +121,8 @@ class BasicRNNDecoder(RNNDecoderBase[HiddenState, BasicRNNDecoderOutput]):
                 specified, you must subclass :class:`BasicRNNDecoder` and
                 override :meth:`embed_tokens`.
         cell (RNNCellBase, optional): An instance of
-            :class:`~texar.torch.core.RNNCellBase`. If `None` (default), a cell
-            is created as specified in :attr:`hparams`.
+            :class:`~texar.torch.core.cell_wrappers.RNNCellBase`. If `None`
+            (default), a cell is created as specified in :attr:`hparams`.
         output_layer (optional): An instance of :torch_nn:`Module`. Apply to
             the RNN cell output to get logits. If `None`, a :torch_nn:`Linear`
             layer is used with output dimension set to :attr:`vocab_size`.
@@ -280,8 +280,8 @@ class AttentionRNNDecoder(RNNDecoderBase[AttentionWrapperState,
                 specified, you must subclass :class:`AttentionRNNDecoder` and
                 override :meth:`embed_tokens`.
         cell (RNNCellBase, optional): An instance of
-            :class:`~texar.torch.core.RNNCellBase`. If `None`, a cell
-            is created as specified in :attr:`hparams`.
+            :class:`~texar.torch.core.cell_wrappers.RNNCellBase`. If `None`,
+            a cell is created as specified in :attr:`hparams`.
         output_layer (optional): An output layer that transforms cell output
             to logits. This can be:
 
@@ -469,9 +469,10 @@ def default_hparams():
             `"type"`: str or class or instance
                 The attention type. Can be an attention class, its name or
                 module path, or a class instance. The class must be a subclass
-                of :class:`~texar.torch.core.AttentionMechanism`. If class name
-                is given, the class must be from modules :mod:`texar.torch.core`
-                or :mod:`texar.torch.custom`.
+                of ``AttentionMechanism``. See :ref:`attention-mechanism` for
+                all supported attention mechanisms. If class name is given,
+                the class must be from modules
+                :mod:`texar.torch.core` or :mod:`texar.torch.custom`.
 
                 Example:
 
@@ -635,7 +636,7 @@ def forward(  # type: ignore
             initial_state (optional): Initial state of decoding.
                 If `None` (default), zero state is used.
             helper (optional): An instance of
-                :class:`texar.torch.modules.decoders.Helper`
+                :class:`~texar.torch.modules.Helper`
                 that defines the decoding strategy. If given,
                 ``decoding_strategy`` and helper configurations in
                 :attr:`hparams` are ignored.

diff --git a/texar/torch/modules/decoders/transformer_decoders.py b/texar/torch/modules/decoders/transformer_decoders.py
@@ -60,7 +60,7 @@ class TransformerDecoder(DecoderBase[Cache, TransformerDecoderOutput]):
     sequence decoding.
 
     It is a stack of
-    :class:`~texar.torch.modules.encoders.MultiheadAttentionEncoder`,
+    :class:`~texar.torch.modules.MultiheadAttentionEncoder`,
     :class:`~texar.torch.modules.FeedForwardNetwork`, and residual connections.
 
     Args:
@@ -334,7 +334,7 @@ def forward(self,  # type: ignore
         r"""Performs decoding.
 
         The interface is very similar to that of RNN decoders
-        (:class:`texar.torch.modules.RNNDecoderBase`). In particular,
+        (:class:`~texar.torch.modules.RNNDecoderBase`). In particular,
         the function provides **3 ways** to specify the decoding method, with
         varying flexibility:
 
@@ -360,14 +360,14 @@ def forward(self,  # type: ignore
           :attr:`beam_width` are both `None`.
 
         2. The :attr:`helper` argument: An instance of subclass of
-           :class:`texar.torch.modules.decoders.Helper`.
+           :class:`~texar.torch.modules.Helper`.
            This provides a superset of decoding strategies than above.
            The interface is the same as in RNN decoders.
            Please refer to :meth:`texar.torch.modules.RNNDecoderBase.forward`
            for detailed usage and examples.
 
            Note that, here, though using a
-           :class:`~texar.torch.decoder.TrainingHelper` corresponding to the
+           :class:`~texar.torch.modules.TrainingHelper` corresponding to the
            ``"train_greedy"`` strategy above, the implementation is *slower*
            than directly setting ``decoding_strategy="train_greedy"`` (though
            output results are the same).
@@ -439,7 +439,7 @@ def forward(self,  # type: ignore
                 time steps that were marked as finished. Ignored in
                 ``"train_greedy"`` decoding.
             helper (optional): An instance of
-                :class:`texar.torch.modules.decoders.Helper`
+                :class:`~texar.torch.modules.Helper`
                 that defines the decoding strategy. If given,
                 ``decoding_strategy`` and helper configurations in
                 :attr:`hparams` are ignored.

diff --git a/texar/torch/modules/decoders/xlnet_decoder.py b/texar/torch/modules/decoders/xlnet_decoder.py
@@ -51,7 +51,7 @@ class XLNetDecoder(XLNetEncoder, DecoderBase[Optional[State], Output]):
     Args:
         pretrained_model_name (optional): a `str`, the name
             of pre-trained model (e.g., ``xlnet-based-cased``). Please refer to
-            :class:`~texar.torch.modules.pretrained.PretrainedXLNetMixin` for
+            :class:`~texar.torch.modules.PretrainedXLNetMixin` for
             all supported models.
             If `None`, the model name in :attr:`hparams` is used.
         cache_dir (optional): the path to a folder in which the
@@ -318,7 +318,7 @@ def forward(self,  # type: ignore
                 more expensive. Defaults to `True`.
             print_steps (bool): If `True`, will print decoding progress.
             helper: Type (or name of the type) of any sub-class of
-                :class:`~texar.torch.modules.decoders.Helper`.
+                :class:`~texar.torch.modules.Helper`.
             helper_kwargs: The keyword arguments to pass to constructor of
                 the specific helper type.
 

diff --git a/texar/torch/modules/embedders/embedders.py b/texar/torch/modules/embedders/embedders.py
@@ -252,7 +252,7 @@ def vocab_size(self) -> int:
     @property
     def num_embeddings(self) -> int:
         r"""The vocabulary size. This interface matches
-        :class:`~torch.nn.Embedding`.
+        :torch_nn:`Embedding`.
         """
         return self._vocab_size
 

diff --git a/texar/torch/modules/encoders/bert_encoder.py b/texar/torch/modules/encoders/bert_encoder.py
@@ -36,15 +36,15 @@ class BERTEncoder(EncoderBase, PretrainedBERTMixin):
     r"""Raw BERT Transformer for encoding sequences.
 
     This module basically stacks
-    :class:`~texar.torch.modules.embedders.WordEmbedder`,
-    :class:`~texar.torch.modules.embedders.PositionEmbedder`,
-    :class:`~texar.torch.modules.encoders.TransformerEncoder` and a dense
+    :class:`~texar.torch.modules.WordEmbedder`,
+    :class:`~texar.torch.modules.PositionEmbedder`,
+    :class:`~texar.torch.modules.TransformerEncoder` and a dense
     pooler.
 
     Args:
         pretrained_model_name (optional): a `str`, the name
             of pre-trained model (e.g., ``bert-base-uncased``). Please refer to
-            :class:`~texar.torch.modules.pretrained.PretrainedBERTMixin` for
+            :class:`~texar.torch.modules.PretrainedBERTMixin` for
             all supported models.
             If `None`, the model name in :attr:`hparams` is used.
         cache_dir (optional): the path to a folder in which the
@@ -201,7 +201,7 @@ def default_hparams():
 
         `"encoder"`: dict
             Hyperparameters for the TransformerEncoder.
-            See :func:`~texar.torch.modules.TransformerEncoder.default_harams`
+            See :func:`~texar.torch.modules.TransformerEncoder.default_hparams`
             for details.
 
         `"hidden_size"`: int

diff --git a/texar/torch/modules/encoders/gpt2_encoder.py b/texar/torch/modules/encoders/gpt2_encoder.py
@@ -33,14 +33,14 @@ class GPT2Encoder(TransformerEncoder, PretrainedGPT2Mixin):
     r"""Raw GPT2 Transformer for encoding sequences.
 
     This module basically stacks
-    :class:`~texar.torch.modules.embedders.WordEmbedder`,
-    :class:`~texar.torch.modules.embedders.PositionEmbedder`,
-    :class:`~texar.torch.modules.encoders.TransformerEncoder`.
+    :class:`~texar.torch.modules.WordEmbedder`,
+    :class:`~texar.torch.modules.PositionEmbedder`,
+    :class:`~texar.torch.modules.TransformerEncoder`.
 
     Args:
         pretrained_model_name (optional): a `str`, the name
             of pre-trained model (e.g., ``gpt2-small``). Please refer to
-            :class:`~texar.torch.modules.pretrained.PretrainedGPT2Mixin` for
+            :class:`~texar.torch.modules.PretrainedGPT2Mixin` for
             all supported models.
             If `None`, the model name in :attr:`hparams` is used.
         cache_dir (optional): the path to a folder in which the
@@ -179,7 +179,7 @@ def default_hparams():
 
         `"decoder"`: dict
             Hyperparameters for the TransformerDecoder.
-            See :func:`~texar.torch.modules.TransformerDecoder.default_harams`
+            See :func:`~texar.torch.modules.TransformerDecoder.default_hparams`
             for details.
 
         `"initializer"`: dict, optional

diff --git a/texar/torch/modules/encoders/transformer_encoder.py b/texar/torch/modules/encoders/transformer_encoder.py
@@ -123,7 +123,7 @@ class TransformerEncoder(EncoderBase):
     sequences.
 
     This module basically stacks
-    :class:`~texar.torch.modules.encoders.MultiheadAttentionEncoder`,
+    :class:`~texar.torch.modules.MultiheadAttentionEncoder`,
     :class:`~texar.torch.modules.FeedForwardNetwork` and residual connections.
     This module supports two types of architectures, namely, the standard
     Transformer Encoder architecture first proposed in

diff --git a/texar/torch/modules/encoders/xlnet_encoder.py b/texar/torch/modules/encoders/xlnet_encoder.py
@@ -39,7 +39,7 @@ class XLNetEncoder(EncoderBase, PretrainedXLNetMixin):
     Args:
         pretrained_model_name (optional): a `str`, the name
             of pre-trained model (e.g., ``xlnet-based-cased``). Please refer to
-            :class:`~texar.torch.modules.pretrained.PretrainedXLNetMixin` for
+            :class:`~texar.torch.modules.PretrainedXLNetMixin` for
             all supported models.
             If `None`, the model name in :attr:`hparams` is used.
         cache_dir (optional): the path to a folder in which the

diff --git a/texar/torch/modules/pretrained/pretrained_base.py b/texar/torch/modules/pretrained/pretrained_base.py
@@ -181,8 +181,7 @@ def download_checkpoint(cls, pretrained_model_name: str,
         Args:
             pretrained_model_name (str): Name of the model checkpoint.
             cache_dir (str, optional): Path to the cache directory. If `None`,
-                uses the default directory given by
-                :meth:`~default_download_dir`.
+                uses the default directory (user's home directory).
 
         Returns:
             Path to the cache directory.

diff --git a/texar/torch/modules/regressors/xlnet_regressor.py b/texar/torch/modules/regressors/xlnet_regressor.py
@@ -44,7 +44,7 @@ class XLNetRegressor(RegressorBase):
     Args:
         pretrained_model_name (optional): a `str`, the name
             of pre-trained model (e.g., ``xlnet-based-cased``). Please refer to
-            :class:`~texar.torch.modules.pretrained.PretrainedXLNetMixin` for
+            :class:`~texar.torch.modules.PretrainedXLNetMixin` for
             all supported models.
             If `None`, the model name in :attr:`hparams` is used.
         cache_dir (optional): the path to a folder in which the

diff --git a/texar/torch/utils/nest.py b/texar/torch/utils/nest.py
@@ -31,22 +31,25 @@ def is_sequence(seq: Any) -> bool:
 def flatten(structure: NestedStructure) -> List[Any]:
     r"""Returns a flat list from a given nested structure.
     If nest is not a sequence, tuple, or dict, then returns a single-element
-    list:[nest].
+    `list:[nest]`.
     In the case of dict instances, the sequence consists of the values,
     sorted by key to ensure deterministic behavior. This is true also for
-    OrderedDict instances: their sequence order is ignored, the sorting order
+    `OrderedDict` instances: their sequence order is ignored, the sorting order
     of keys is used instead. The same convention is followed in
-    pack_sequence_as. This correctly repacks dicts and OrderedDicts after
-    they have been flattened, and also allows flattening an OrderedDict
-    and then repacking it back using a corresponding plain dict,
-    or vice-versa. Dictionaries with non-sortable keys cannot be flattened.
-    Users must not modify any collections used in nest while this function is
-    running.
+    :func:`~texar.torch.utils.nest.pack_sequence_as`. This correctly repacks
+    dictionaries and `OrderedDict`s after they have been flattened, and also
+    allows flattening an `OrderedDict` and then repacking it back using a
+    corresponding plain dict, or vice-versa. Dictionaries with non-sortable
+    keys cannot be flattened. Users must not modify any collections used in
+    nest while this function is running.
+
     Args:
         structure: an arbitrarily nested structure or a scalar object. Note,
-        numpy arrays are considered scalars.
+            numpy arrays are considered scalars.
+
     Returns:
         A Python list, the flattened version of the input.
+
     Raises:
         TypeError: The nest is or contains a dict with non-sortable keys.
     """
@@ -67,28 +70,32 @@ def pack_sequence_as(structure: NestedStructure,
                      flat_sequence: Union[List, Tuple]
                      ) -> NestedStructure:
     r"""Returns a given flattened sequence packed into a given structure.
-    If `structure` is a scalar, `flat_sequence` must be a single-element list;
-    in this case the return value is `flat_sequence[0]`.
-    If `structure` is or contains a dict instance, the keys will be sorted to
+    If ``structure`` is a scalar, ``flat_sequence`` must be a single-element
+    list; in this case the return value is ``flat_sequence[0]``.
+    If ``structure`` is or contains a dict instance, the keys will be sorted to
     pack the flat sequence in deterministic order. This is true also for
     `OrderedDict` instances: their sequence order is ignored, the sorting
     order of keys is used instead. The same convention is followed in
-    `flatten`. This correctly repacks dicts and `OrderedDict`s after they
-    have been flattened, and also allows flattening an `OrderedDict` and
-    then repacking it back using a corresponding plain dict, or vice-versa.
-    Dictionaries with non-sortable keys cannot be flattened.
+    :func:`~texar.torch.utils.nest.flatten`. This correctly repacks dictionaries
+    and `OrderedDicts` after they have been flattened, and also allows
+    flattening an `OrderedDict` and then repacking it back using a
+    corresponding plain dict, or vice-versa. Dictionaries with non-sortable
+    keys cannot be flattened.
+
     Args:
         structure: Nested structure, whose structure is given by nested lists,
-            tuples, and dicts. Note: numpy arrays and strings are considered
-            scalars.
+            tuples, and dictionaries. Note: numpy arrays and strings are
+            considered scalars.
         flat_sequence: flat sequence to pack.
+
     Returns:
-        packed: `flat_sequence` converted to have the same recursive
-        structure as `structure`.
+        packed: ``flat_sequence`` converted to have the same recursive
+        structure as ``structure``.
+
     Raises:
-        ValueError: If `flat_sequence` and `structure` have different
-        element counts.
-        TypeError: `structure` is or contains a dict with non-sortable keys.
+        ValueError: If ``flat_sequence`` and ``structure`` have different
+            element counts.
+        TypeError: ``structure`` is or contains a dict with non-sortable keys.
     """
     if not is_sequence(flat_sequence):
         raise TypeError("flat_sequence must be a sequence")