Merge a314e5e into 36b721b

arviz-devs · Nov 17, 2019 · 47c26c3 · 47c26c3
2 parents 36b721b + a314e5e
commit 47c26c3
Show file tree

Hide file tree

Showing 14 changed files with 10,298 additions and 93 deletions.
diff --git a/arviz/data/inference_data.py b/arviz/data/inference_data.py
@@ -153,7 +153,7 @@ def __add__(self, other):
         """Concatenate two InferenceData objects."""
         return concat(self, other, copy=True, inplace=False)
 
-    def sel(self, inplace=False, **kwargs):
+    def sel(self, inplace=False, chain_prior=False, **kwargs):
         """Perform an xarray selection on all groups.
 
         Loops over all groups to perform Dataset.sel(key=item)
@@ -165,8 +165,12 @@ def sel(self, inplace=False, **kwargs):
 
         Parameters
         ----------
-        inplace : bool
-            If True, modify the InferenceData object inplace, otherwise, return the modified copy.
+        inplace : bool, optional
+            If ``True``, modify the InferenceData object inplace,
+            otherwise, return the modified copy.
+        chain_prior: bool, optional
+            If ``False``, do not select prior related groups using ``chain`` dim.
+            Otherwise, use selection on ``chain`` if present
         **kwargs : mapping
             It must be accepted by Dataset.sel()
 
@@ -204,6 +208,8 @@ def sel(self, inplace=False, **kwargs):
         for group in self._groups:
             dataset = getattr(self, group)
             valid_keys = set(kwargs.keys()).intersection(dataset.dims)
+            if not chain_prior and "prior" in group:
+                valid_keys -= {"chain"}
             dataset = dataset.sel(**{key: kwargs[key] for key in valid_keys})
             setattr(out, group, dataset)
         if inplace:

diff --git a/arviz/tests/test_data.py b/arviz/tests/test_data.py
@@ -333,6 +333,25 @@ def test_sel_method(inplace):
             assert np.all(dataset.draw.values == np.arange(200, ndraws))
 
 
+def test_sel_method_chain_prior():
+    idata = load_arviz_data("centered_eight")
+    original_groups = getattr(idata, "_groups")
+    idata_subset = idata.sel(inplace=False, chain_prior=False, chain=[0, 1, 3])
+    groups = getattr(idata_subset, "_groups")
+    assert np.all(np.isin(groups, original_groups))
+    for group in groups:
+        dataset_subset = getattr(idata_subset, group)
+        dataset = getattr(idata, group)
+        if "chain" in dataset.dims:
+            assert "chain" in dataset_subset.dims
+            if "prior" not in group:
+                assert np.all(dataset_subset.chain.values == np.array([0, 1, 3]))
+        else:
+            assert "chain" not in dataset_subset.dims
+    with pytest.raises(KeyError):
+        idata.sel(inplace=False, chain_prior=True, chain=[0, 1, 3])
+
+
 @pytest.mark.parametrize("use", ("del", "delattr"))
 def test_del_method(use):
     # create inference data object

diff --git a/doc/conf.py b/doc/conf.py
@@ -54,6 +54,7 @@
     "matplotlib.sphinxext.plot_directive",
     "numpydoc",
     "nbsphinx",
+    "m2r",
     "IPython.sphinxext.ipython_directive",
     "IPython.sphinxext.ipython_console_highlighting",
     "gallery_generator",
@@ -79,8 +80,8 @@
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 #
-# source_suffix = ['.rst', '.md']
-source_suffix = ".rst"
+source_suffix = ['.rst', '.md']
+# source_suffix = ".rst"
 
 # The master toctree document.
 master_doc = "index"

diff --git a/doc/index.rst b/doc/index.rst
@@ -25,7 +25,11 @@ ArviZ: Exploratory analysis of Bayesian models
 
 ArviZ is a Python package for exploratory analysis of Bayesian models. Includes functions for posterior analysis, sample diagnostics, model checking, and comparison.
 
-The goal is to provide backend-agnostic tools for diagnostics and visualizations of Bayesian inference in Python, by first converting inference data into `xarray <https://xarray.pydata.org/en/stable/>`_ objects. See :doc:`here <notebooks/XarrayforArviZ>` for more on xarray and ArviZ.
+The goal is to provide backend-agnostic tools for diagnostics and visualizations of Bayesian inference in Python,
+by first converting inference data into `xarray <https://xarray.pydata.org/en/stable/>`_ objects.
+See :doc:`here <notebooks/XarrayforArviZ>` for more on xarray and ArviZ usage
+and :doc:`here <schema/schema>` for more on ``InferenceData`` structure
+and specification.
 
 
 

diff --git a/doc/notebooks/InferenceDataCookbook.ipynb b/doc/notebooks/InferenceDataCookbook.ipynb
@@ -5,7 +5,7 @@
    "metadata": {},
    "source": [
     "# Inference Data Cookbook\n",
-    "`InferenceData` is the central data format for ArviZ. `InferenceData` itself is just a container that maintains references to one or more `xarray.Dataset`. Below are various ways to generate an `InferenceData` object. See [here](XarrayforArviZ.ipynb) for more on xarray."
+    "`InferenceData` is the central data format for ArviZ. `InferenceData` itself is just a container that maintains references to one or more `xarray.Dataset`. See the `InferenceData` structure specification [here](../schema/schema.html). Below are various ways to generate an `InferenceData` object. See [here](XarrayforArviZ.ipynb) for more on xarray."
    ]
   },
   {

diff --git a/doc/notebooks/Introduction.ipynb b/doc/notebooks/Introduction.ipynb
@@ -184,7 +184,7 @@
     "\n",
     "For much more powerful querying, analysis and plotting, we can use built-in ArviZ utilities to convert PyMC3 objects to xarray datasets. Note we are also giving some information about labelling.\n",
     "\n",
-    "ArviZ is built to work with `InferenceData`, and the more *groups* it has access to, the more powerful analyses it can perform. Here is a plot of the trace, which is common in PyMC3 workflows. Note the intelligent labels."
+    "ArviZ is built to work with `InferenceData`, and the more *groups* it has access to, the more powerful analyses it can perform. See the `InferenceData` structure specification [here](../schema/schema.html). Here is a plot of the trace, which is common in PyMC3 workflows. Note the intelligent labels."
    ]
   },
   {
@@ -470,7 +470,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,

diff --git a/doc/notebooks/XarrayforArviZ.ipynb b/doc/notebooks/XarrayforArviZ.ipynb
@@ -40,6 +40,8 @@
     "\n",
     "</ul> \n",
     "\n",
+    "See the `InferenceData` structure specification [here](../schema/schema.html).\n",
+    "\n",
     "\n",
     "## Why not Pandas Dataframes or Numpy Arrays?\n",
     "Data from probabilistic programming is naturally high dimensional. To add to the complexity ArviZ must handle the data generated from multiple Bayesian Modeling libraries, such as PyMC3 and PyStan. This is an application that the *xarray* package handles quite well. The xarray package lets users manage high dimensional data with human readable dimensions and coordinates quite easily.\n",
@@ -259,7 +261,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.5"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,