Idep (#127)

* add idep * remove merge conflict leftover * add some more distributions * expose maxiters to idep * updates * start idep docs * special case bivariate forms of ibroja, idep * add features to dependencydecomposition, and then use them * finish off idep * add a test for what seems to be a tricky broja optimization * adjust sensativity of test * soften a few more tests * clean up idep variants * fix typo
dit · Sep 20, 2017 · 46324d0 · 46324d0
1 parent c83e762
commit 46324d0
Show file tree

Hide file tree

Showing 11 changed files with 474 additions and 70 deletions.
diff --git a/dit/algorithms/scipy_optimizers.py b/dit/algorithms/scipy_optimizers.py
@@ -189,7 +189,7 @@ def co_information(self, x):
         spmf = [ pmf.sum(axis=subset, keepdims=True)**((-1)**(n - len(subset))) for subset in self._subvars ]
         return np.nansum(pmf * np.log2(np.prod(spmf)))
 
-    def optimize(self, x0=None, bounds=None, nhops=10, polish=1e-10):
+    def optimize(self, x0=None, bounds=None, nhops=10, polish=1e-10, maxiters=1000):
         """
         Perform the optimization. Dispatches to the appropriate backend.
 
@@ -224,7 +224,7 @@ def optimize(self, x0=None, bounds=None, nhops=10, polish=1e-10):
                   'constraints': self.constraints,
                   'tol': None,
                   'callback': None,
-                  'options': {'maxiter': 1000,
+                  'options': {'maxiter': maxiters,
                               'ftol': 1e-7,
                               'eps': 1.4901161193847656e-08,
                              },
@@ -356,16 +356,16 @@ def _optimization_backend(self, x0, kwargs, nhops):
         frontend and the SLSQP algorithm because it is one of the few generic
         optimizers which can work with both bounds and constraints.
         """
-        res = minimize(fun=self.objective,
-                       x0=x0,
-                       **kwargs
-                      )
+        self.res = minimize(fun=self.objective,
+                            x0=x0,
+                            **kwargs
+                           )
 
-        if not res.success: # pragma: no cover
-            msg = "Optimization failed: {}".format(res.message)
+        if not self.res.success: # pragma: no cover
+            msg = "Optimization failed: {}".format(self.res.message)
             raise ditException(msg)
 
-        self._optima = res.x
+        self._optima = self.res.x
 
 
 class BaseNonConvexOptimizer(BaseOptimizer):
@@ -400,17 +400,17 @@ def _optimization_backend(self, x0, kwargs, nhops):
         """
         self._callback = BasinHoppingCallBack(kwargs['constraints'], None)
 
-        res = basinhopping(func=self.objective,
-                           x0=x0,
-                           minimizer_kwargs=kwargs,
-                           niter=nhops,
-                           callback=self._callback,
-                           accept_test=accept_test,
-                          )
+        self.res = basinhopping(func=self.objective,
+                                x0=x0,
+                                minimizer_kwargs=kwargs,
+                                niter=nhops,
+                                callback=self._callback,
+                                accept_test=accept_test,
+                               )
 
-        success, msg = basinhop_status(res)
+        success, msg = basinhop_status(self.res)
         if success:
-            self._optima = res.x
+            self._optima = self.res.x
         else: # pragma: no cover
             minimum = self._callback.minimum()
             if minimum is not None:
@@ -486,11 +486,8 @@ def objective(self, x):
         """
         return self.co_information(x)
 
-@removals.removed_class("BROJAOptimizer",
-                        replacement="dit.pid.ibroja.BROJAOptimizer",
-                        message="Please use the dit.pid module instead.",
-                        version='1.0.0.dev8')
-class BROJAOptimizer(BaseConvexOptimizer, MaxCoInfoOptimizer):
+
+class BROJABivariateOptimizer(MaxCoInfoOptimizer):
     """
     An optimizer for constructing the maximum co-information distribution
     consistent with (source, target) marginals of the given distribution.
@@ -524,13 +521,13 @@ def __init__(self, dist, sources, target, rv_mode=None):
             consulted, which defaults to 'indices'.
         """
         dist = broja_prepare_dist(dist, sources, target, rv_mode)
-        super(BROJAOptimizer, self).__init__(dist, [[0, 2], [1, 2]])
+        super(BROJABivariateOptimizer, self).__init__(dist, [[0, 2], [1, 2]])
 
         extra_free = broja_extra_constraints(self.dist, 2).free
         self._free = list(sorted(set(self._free) & set(extra_free)))
 
 
-def maxent_dist(dist, rvs, rv_mode=None, x0=None, sparse=True):
+def maxent_dist(dist, rvs, rv_mode=None, x0=None, sparse=True, maxiters=1000):
     """
     Return the maximum entropy distribution consistent with the marginals from
     `dist` specified in `rvs`.
@@ -559,7 +556,7 @@ def maxent_dist(dist, rvs, rv_mode=None, x0=None, sparse=True):
         The maximum entropy distribution.
     """
     meo = MaxEntOptimizer(dist, rvs, rv_mode)
-    meo.optimize(x0=x0)
+    meo.optimize(x0=x0, maxiters=maxiters)
     dist = meo.construct_dist(sparse=sparse)
     return dist
 
@@ -658,7 +655,7 @@ def pid_broja(dist, sources, target, rv_mode=None, return_opt=False):
         The distribution resulting from the optimizaiton. Note that var [0]
         is sources[0], [1] is sources[1] and [2] is target.
     """
-    broja = BROJAOptimizer(dist, sources, target, rv_mode)
+    broja = BROJABivariateOptimizer(dist, sources, target, rv_mode)
     broja.optimize()
     opt_dist = broja.construct_dist()
     r = -broja.objective(broja._optima)

diff --git a/dit/pid/__init__.py b/dit/pid/__init__.py
@@ -8,4 +8,5 @@
 from .immi import PID_MMI
 from .ibroja import PID_BROJA
 from .iccs import PID_CCS
+from .idep import PID_dep
 from .idownarrow import PID_downarrow
diff --git a/dit/pid/distributions/__init__.py b/dit/pid/distributions/__init__.py
@@ -19,6 +19,7 @@
               'and': bivariate.and_,
               'reduced or': bivariate.reduced_or,
               'sum': bivariate.sum_,
+              'f1': bivariate.f1,
               'jeff': bivariate.jeff_2,
               'wb 1': bivariate.wb_1,
               'wb 2': bivariate.wb_2,
@@ -27,7 +28,10 @@
               'rdn xor': bivariate.rdn_xor,
               'prob 1': bivariate.prob_1,
               'prob 2': bivariate.prob_2,
-               'gband': bivariate.gband,
+              'gband': bivariate.gband,
+              'boom': bivariate.boom,
+              'not two': bivariate.not_two,
+              'pointwise unq': bivariate.pwu,
               }
 
 trivariates = {'uniform': trivariate.uni,
@@ -42,4 +46,5 @@
                'shared xor': trivariate.shared_xor,
                'xor shared': trivariate.xor_shared,
                'giant xor': trivariate.giant_xor,
+               'dbl xor': trivariate.dblxor,
                }
diff --git a/dit/pid/distributions/bivariate.py b/dit/pid/distributions/bivariate.py
@@ -33,10 +33,13 @@
 # output is the logical and of the inputs
 and_ = uniform(['000', '010', '100', '111'])
 
+# f1 from 'extractable information'
+f1 = uniform(['002', '010', '102', '111'])
+
 # jeff's generalization of rdn
 jeff_2 = jeff(2)
 
-# reduced or, from ince.
+# reduced or, from lizier by way of ince.
 reduced_or = Distribution(['000', '011', '101'], [1/2, 1/4, 1/4])
 
 # output is sum of inputs
@@ -69,3 +72,14 @@
 # gband, some measures demonstrate subadditivity of redundancy with this distribution
 gband = uniform(['000', '010', '100', '111', '222', '232', '322', '333'])
 
+# min == proj, broja == ccs, dep stands alone
+boom = uniform(['001', '002', '020', '121', '202', '212'])
+
+# contains holistic synergy
+not_two = uniform(['000', '001', '010', '100', '111'])
+
+# the prototype for redundancy in and and sum
+dup = uniform(['000', '001', '111', '112'])
+
+# pointwise unique
+pwu = uniform(['011', '101', '022', '202'])
diff --git a/dit/pid/distributions/trivariate.py b/dit/pid/distributions/trivariate.py
@@ -41,8 +41,12 @@
 giant_xor = uniform(['0000', '0120', '0231', '0311', '1031', '1111', '1200', '1320',
                      '2010', '2130', '2221', '2301', '3021', '3101', '3210', '3330'])
 
+xor_giant = uniform(['0000', '0211', '1021', '1230', '2101', '2310', '3120', '3331'])
+
 # difficult to interpret
 jeff_3 = jeff(3)
 
 # anddup, from griffith
 anddup = uniform(['0000', '0100', '1010', '1111'])
+
+dblxor = uniform(['0000', '0011', '0103', '0112', '1002', '1013', '1101', '1110'])
diff --git a/dit/pid/ibroja.py b/dit/pid/ibroja.py
@@ -8,7 +8,7 @@
 
 from .pid import BaseUniquePID
 
-from ..algorithms.scipy_optimizers import BaseConvexOptimizer
+from ..algorithms.scipy_optimizers import BaseConvexOptimizer, BROJABivariateOptimizer
 from ..multivariate import coinformation
 
 
@@ -19,32 +19,31 @@ class BROJAOptimizer(BaseConvexOptimizer):
     maximizing the coinformation.
     """
 
-    def __init__(self, dist, inputs, output, rv_mode=None):
+    def __init__(self, dist, input, others, output, rv_mode=None):
         """
         Initialize the optimizer.
 
         Parameters
         ----------
         dist : Distribution
             The distribution to base the optimization on.
-        inputs : iterable of iterables
+        input : iterable
             Variables to treat as inputs.
+        others : iterable of iterables
+            The other input variables.
         output : iterable
             The output variable.
         rv_mode : bool
             Unused, provided for compatibility with parent class.
         """
-        self._inputs = inputs
-        self._output = output
-        self._var_map = {var: i for i, var in enumerate(inputs + (output,))}
-        dist = dist.coalesce(inputs + (output,))
-        constraints = [i + dist.rvs[-1] for i in dist.rvs[:-1]]
+        dist = dist.coalesce((input,) + (sum(others, ()),) + (output,))
+        constraints = [[0, 2], [1, 2]]
         super(BROJAOptimizer, self).__init__(dist, constraints)
 
 
     def objective(self, x):
         """
-        Minimize the portion of I(inputs:output) that is not I(input:output|others).
+        Minimize I(input:output|others).
 
         Parameters
         ----------
@@ -59,19 +58,28 @@ def objective(self, x):
         pmf = self._expand(x).reshape(self._shape)
         h_total = -np.nansum(pmf * np.log2(pmf))
 
-        inputs = tuple(range(len(self._shape) - 1))
-        p_output = pmf.sum(axis=inputs)
+        p_output = pmf.sum(axis=(0, 1))
         h_output = -np.nansum(p_output * np.log2(p_output))
 
-        input_pmf = pmf.sum(axis=-1)
+        input_pmf = pmf.sum(axis=2)
         h_input = -np.nansum(input_pmf * np.log2(input_pmf))
 
         mi = h_input + h_output - h_total
 
-        return mi
+        reduced_pmf = pmf.sum(axis=0)
+        h_reduced = -np.nansum(reduced_pmf * np.log2(reduced_pmf))
 
+        others_pmf = pmf.sum(axis=(0, 2))
+        h_others = -np.nansum(others_pmf * np.log2(others_pmf))
 
-def i_broja(d, inputs, output):
+        omi = h_others + h_output - h_reduced
+
+        cmi = mi - omi
+
+        return cmi
+
+
+def i_broja(d, inputs, output, maxiters=1000):
     """
     This computes unique information as min{I(input : output | other_inputs)} over the space of distributions
     which matches input-output marginals.
@@ -91,13 +99,21 @@ def i_broja(d, inputs, output):
         The value of I_broja for each individual input.
     """
     uniques = {}
-    for input_ in inputs: # fix this to do simpler, and independent optimizations
-        others = sum([i for i in inputs if i != input_], ())
-        dm = d.coalesce([input_, others, output])
-        broja = BROJAOptimizer(dm, ((0,), (1,)), (2,))
-        broja.optimize()
-        d_opt = broja.construct_dist()
-        uniques[input_] = coinformation(d_opt, [[0], [2]], [1])
+    if len(inputs) == 2:
+        broja = BROJABivariateOptimizer(d, list(inputs), output)
+        broja.optimize(maxiters=maxiters)
+        opt_dist = broja.construct_dist()
+        uniques[inputs[0]] = coinformation(opt_dist, [[0], [2]], [1])
+        uniques[inputs[1]] = coinformation(opt_dist, [[1], [2]], [0])
+    else:
+        for input_ in inputs:
+            others = sum([i for i in inputs if i != input_], ())
+            dm = d.coalesce([input_, others, output])
+            broja = BROJAOptimizer(dm, (0,), ((1,),), (2,))
+            broja.optimize(maxiters=maxiters)
+            d_opt = broja.construct_dist()
+            uniques[input_] = coinformation(d_opt, [[0], [2]], [1])
+
     return uniques