Merge pull request #111 from Autoplectic/functional_common_information

Functional common information
dit · May 27, 2016 · d425c46 · d425c46
2 parents 755f7bc + 0c4a149
commit d425c46
Show file tree

Hide file tree

Showing 19 changed files with 441 additions and 64 deletions.
diff --git a/dit/abc.py b/dit/abc.py
@@ -21,9 +21,10 @@
                               caekl_mutual_information as J,
                               coinformation as I,
                               entropy as H,
+                              functional_common_information as F,
                               gk_common_information as K,
                               interaction_information as II,
-                              joint_mss_entropy as M,
+                              mss_common_information as M,
                               residual_entropy as R,
                               total_correlation as T,
                               tse_complexity as TSE,
@@ -40,7 +41,7 @@
     'H',    # the joint conditional entropy
     'I',    # the multivariate conditional mututal information
     'T',    # the conditional total correlation [multi-information/integration]
-    'B',    # the conditional binding information [dual total correlation]
+    'B',    # the conditional dual total correlation [binding information]
     'R',    # the conditional residual entropy [erasure entropy]
     'TSE',  # the TSE complexity
 ]
@@ -50,6 +51,7 @@
     'J',    # the CAEKL common information
     'K',    # the Gacs-Korner common information [meet entropy]
     'II',   # the interaction information
+    'F',    # the functional common information
     'M',    # the joint minimal sufficient statistic entropy
 ]
 

diff --git a/dit/multivariate/__init__.py b/dit/multivariate/__init__.py
@@ -3,12 +3,16 @@
 measures and others are more distantly related.
 """
 
-from .binding_information import binding_information, dual_total_correlation, residual_entropy, variation_of_information
 from .caekl_mutual_information import caekl_mutual_information
 from .coinformation import coinformation
+from .dual_total_correlation import (binding_information,
+                                     dual_total_correlation,
+                                     independent_information, residual_entropy,
+                                     variation_of_information)
 from .entropy import entropy
+from .functional_common_information import functional_common_information
 from .gk_common_information import gk_common_information
 from .interaction_information import interaction_information
-from .joint_mss import joint_mss_entropy
+from .mss_common_information import mss_common_information
 from .total_correlation import total_correlation
 from .tse_complexity import tse_complexity
diff --git a/dit/multivariate/binding_information.py → dit/multivariate/dual_total_correlation.py b/dit/multivariate/binding_information.py → dit/multivariate/dual_total_correlation.py
@@ -1,27 +1,29 @@
 """
-The binding information and residual entropy.
+The dual total correlation and variation of information.
 """
 
 from ..shannon import conditional_entropy as H
 from ..helpers import normalize_rvs
 
 __all__ = ('binding_information',
            'dual_total_correlation',
+           'independent_information',
            'residual_entropy',
+           'variation_of_information',
           )
 
-def binding_information(dist, rvs=None, crvs=None, rv_mode=None):
+def dual_total_correlation(dist, rvs=None, crvs=None, rv_mode=None):
     """
-    Calculates the binding information, also known as the dual total
-    correlation.
+    Calculates the dual total correlation, also known as the binding
+    information.
 
     Parameters
     ----------
     dist : Distribution
-        The distribution from which the binding information is calculated.
+        The distribution from which the dual total correlation is calculated.
     rvs : list, None
         The indexes of the random variable used to calculate the binding
-        information. If None, then the binding information is calculated
+        information. If None, then the dual total correlation is calculated
         over all random variables.
     crvs : list, None
         The indexes of the random variables to condition on. If None, then no
@@ -37,7 +39,7 @@ def binding_information(dist, rvs=None, crvs=None, rv_mode=None):
     Returns
     -------
     B : float
-        The binding information.
+        The dual total correlation.
 
     Raises
     ------
@@ -59,6 +61,8 @@ def binding_information(dist, rvs=None, crvs=None, rv_mode=None):
 
 def residual_entropy(dist, rvs=None, crvs=None, rv_mode=None):
     """
+    Compute the residual entropy.
+
     Parameters
     ----------
     dist : Distribution
@@ -99,6 +103,6 @@ def residual_entropy(dist, rvs=None, crvs=None, rv_mode=None):
     return R
 
 
-dual_total_correlation = binding_information
+binding_information = dual_total_correlation
 
-variation_of_information = residual_entropy
+independent_information = variation_of_information = residual_entropy
diff --git a/dit/multivariate/functional_common_information.py b/dit/multivariate/functional_common_information.py
@@ -0,0 +1,192 @@
+"""
+The functional common information.
+"""
+
+from collections import deque
+from itertools import combinations
+
+from ..distconst import insert_rvf, modify_outcomes
+from ..helpers import flatten, normalize_rvs
+from ..math import close
+from ..utils import partitions
+
+from .entropy import entropy
+from .dual_total_correlation import dual_total_correlation
+
+__all__ = ['functional_common_information']
+
+def add_partition(dist, part):
+    """
+    Add a function of the joint distribution.
+
+    Parameters
+    ----------
+    dist : Distribution
+        The distribution to add a function to.
+    part : list of lists
+        A partition of the outcomes. Each outcome will be mapped to the id of
+        its partition element.
+
+    Returns
+    -------
+    dist : Distribution
+        The original `dist` with the function defined by `part` added.
+    """
+    invert_part = {e: (i,) for i, es in enumerate(part) for e in es}
+    dist = insert_rvf(dist, lambda j: invert_part[j])
+    return dist
+
+def functional_markov_chain_naive(dist, rvs=None, crvs=None, rv_mode=None): # pragma: no cover
+    """
+    Add the smallest function of `dist` which renders `rvs` independent.
+
+    Parameters
+    ----------
+    dist : Distribution
+        The distribution for which the smallest function will be constructed.
+    rvs : list, None
+        A list of lists. Each inner list specifies the indexes of the random
+        variables used to calculate the total correlation. If None, then the
+        total correlation is calculated over all random variables, which is
+        equivalent to passing `rvs=dist.rvs`.
+    crvs : list, None
+        A single list of indexes specifying the random variables to condition
+        on. If None, then no variables are conditioned on.
+    rv_mode : str, None
+        Specifies how to interpret `rvs` and `crvs`. Valid options are:
+        {'indices', 'names'}. If equal to 'indices', then the elements of
+        `crvs` and `rvs` are interpreted as random variable indices. If equal
+        to 'names', the the elements are interpreted as random variable names.
+        If `None`, then the value of `dist._rv_mode` is consulted, which
+        defaults to 'indices'.
+
+    Returns
+    -------
+    d : Distribution
+        The distribution `dist` with the additional variable added to the end.
+    """
+    rvs, crvs, rv_mode = normalize_rvs(dist, rvs, crvs, rv_mode)
+    outcomes = dist.outcomes
+    f = [len(dist.rvs)]
+    parts = partitions(outcomes)
+    dists = [ add_partition(dist, part) for part in parts ]
+    B = lambda d: dual_total_correlation(d, rvs, crvs+f, rv_mode)
+    dists = [ d for d in dists if close(B(d), 0) ]
+    return min(dists, key=lambda d: entropy(d, rvs=f, rv_mode=rv_mode))
+
+def functional_markov_chain(dist, rvs=None, crvs=None, rv_mode=None):
+    """
+    Add the smallest function of `dist` which renders `rvs` independent.
+
+    Parameters
+    ----------
+    dist : Distribution
+        The distribution for which the smallest function will be constructed.
+    rvs : list, None
+        A list of lists. Each inner list specifies the indexes of the random
+        variables used to calculate the total correlation. If None, then the
+        total correlation is calculated over all random variables, which is
+        equivalent to passing `rvs=dist.rvs`.
+    crvs : list, None
+        A single list of indexes specifying the random variables to condition
+        on. If None, then no variables are conditioned on.
+    rv_mode : str, None
+        Specifies how to interpret `rvs` and `crvs`. Valid options are:
+        {'indices', 'names'}. If equal to 'indices', then the elements of
+        `crvs` and `rvs` are interpreted as random variable indices. If equal
+        to 'names', the the elements are interpreted as random variable names.
+        If `None`, then the value of `dist._rv_mode` is consulted, which
+        defaults to 'indices'.
+
+    Returns
+    -------
+    d : Distribution
+        The distribution `dist` with the additional variable added to the end.
+
+    Notes
+    -----
+    The implimentation of this function is quite slow. It is approximately
+    doubly exponential in the size of the sample space. This method is several
+    times faster than the naive method however. It remains an open question as
+    to whether a method to directly construct this variable exists (as it does
+    with the GK common variable, minimal sufficient statistic, etc).
+    """
+    rvs, crvs, rv_mode = normalize_rvs(dist, rvs, crvs, rv_mode)
+
+    dist = modify_outcomes(dist, lambda x: tuple(x))
+
+    part = frozenset([ frozenset([o]) for o in dist.outcomes ]) # make copy
+
+    W = [dist.outcome_length()]
+
+    H = lambda d: entropy(d, W, rv_mode=rv_mode)
+    B = lambda d: dual_total_correlation(d, rvs, crvs+W, rv_mode)
+
+    optimal_b = dual_total_correlation(dist, rvs, crvs, rv_mode)
+
+    initial = add_partition(dist, part)
+    optimal = (H(initial), initial)
+
+    queue = deque([part])
+
+    checked = set()
+
+    while queue: # pragma: no branch
+        part = queue.popleft()
+
+        checked.add(part)
+
+        d = add_partition(dist, part)
+
+        if close(B(d), 0):
+
+            h = H(d)
+
+            if h <= optimal[0]:
+                optimal = (h, d)
+
+            if close(h, optimal_b):
+                break
+
+            new_parts = [frozenset([ p for p in part if p not in pair ] +
+                                   [pair[0]|pair[1]])
+                         for pair in combinations(part, 2) ]
+            new_parts = sorted([ part for part in new_parts if part not in checked ], key=lambda p: sorted(map(len, p)))
+            queue.extendleft(new_parts)
+
+    return optimal[1]
+
+def functional_common_information(dist, rvs=None, crvs=None, rv_mode=None):
+    """
+    Compute the functional common information, F, of `dist`. It is the entropy
+    of the smallest random variable W such that all the variables in `rvs` are
+    rendered independent conditioned on W, and W is a function of `rvs`.
+
+    Parameters
+    ----------
+    dist : Distribution
+        The distribution from which the functional common information is
+        computed.
+    rvs : list, None
+        A list of lists. Each inner list specifies the indexes of the random
+        variables used to calculate the total correlation. If None, then the
+        total correlation is calculated over all random variables, which is
+        equivalent to passing `rvs=dist.rvs`.
+    crvs : list, None
+        A single list of indexes specifying the random variables to condition
+        on. If None, then no variables are conditioned on.
+    rv_mode : str, None
+        Specifies how to interpret `rvs` and `crvs`. Valid options are:
+        {'indices', 'names'}. If equal to 'indices', then the elements of
+        `crvs` and `rvs` are interpreted as random variable indices. If equal
+        to 'names', the the elements are interpreted as random variable names.
+        If `None`, then the value of `dist._rv_mode` is consulted, which
+        defaults to 'indices'.
+
+    Returns
+    -------
+    F : float
+        The functional common information.
+    """
+    d = functional_markov_chain(dist, rvs, crvs, rv_mode)
+    return entropy(d, [dist.outcome_length()])
diff --git a/dit/multivariate/joint_mss.py → dit/multivariate/mss_common_information.py b/dit/multivariate/joint_mss.py → dit/multivariate/mss_common_information.py
@@ -1,13 +1,16 @@
 """
-Compute the entropy of the joint minimal sufficient statistic.
+Compute the minimal sufficient statistic common information.
 """
 
 from ..algorithms.minimal_sufficient_statistic import insert_joint_mss
+from ..helpers import normalize_rvs
 from .entropy import entropy
 
-def joint_mss_entropy(dist, rvs=None, crvs=None, rv_mode=None):
+def mss_common_information(dist, rvs=None, crvs=None, rv_mode=None):
     """
-    Compute the entropy of the join of the minimal sufficent statistic of each variable about the others.
+    Compute the minimal sufficient statistic common information, which is the
+    entropy of the join of the minimal sufficent statistic of each variable
+    about the others.
 
     Parameters
     ----------
@@ -26,6 +29,7 @@ def joint_mss_entropy(dist, rvs=None, crvs=None, rv_mode=None):
         defaults to 'indices'.
 
     """
+    rvs, crvs, rv_mode = normalize_rvs(dist, rvs, crvs, rv_mode)
     d = insert_joint_mss(dist, -1, rvs, rv_mode)
 
     M = entropy(d, [d.outcome_length() - 1], crvs, rv_mode)

diff --git a/...variate/tests/test_binding_information.py → ...iate/tests/test_dual_total_correlation.py b/...variate/tests/test_binding_information.py → ...iate/tests/test_dual_total_correlation.py
@@ -1,14 +1,14 @@
 """
-Tests for dit.multivariate.binding_information.
+Tests for dit.multivariate.dual_total_correlation.
 """
 
 from __future__ import division
 
 from nose.tools import assert_almost_equal, assert_raises
 
 from dit import Distribution as D, ScalarDistribution as SD
-from dit.multivariate import (binding_information as B,
-                            residual_entropy as R)
+from dit.multivariate import (dual_total_correlation as B,
+                              residual_entropy as R)
 from dit.shannon import (entropy as H,
                          mutual_information as I)
 from dit.exceptions import ditException