Update sampling from DPP(L) with L_dual kernel

guilgautier · Jan 7, 2020 · fb0aae9 · fb0aae9
1 parent 25e20f1
commit fb0aae9
Show file tree

Hide file tree

Showing 4 changed files with 79 additions and 123 deletions.
diff --git a/docs/finite_dpps/exact_sampling.rst b/docs/finite_dpps/exact_sampling.rst
@@ -368,20 +368,19 @@ In practice
 
 		[[3, 1, 0, 4], [9, 6], [4, 1, 3, 0], [7, 0, 6, 4], [5, 0, 7], [4, 0, 2], [5, 3, 8, 4], [0, 5, 2], [7, 0, 2], [6, 0, 3]]
 
-- Sampling a :math:`\operatorname{DPP}(\mathbf{L})` for which each item is represented by a :math:`d\leq N` dimensional feature vector, all stored in a _feature_ matrix :math:`\Phi \in \mathbb{R}^{d\times N}`, so that :math:`\mathbf{L}=\Phi^{\top} \Phi \succeq 0_N`, can be done by following
+- Sampling a :math:`\operatorname{DPP}(\mathbf{L})` for which each item is represented by a :math:`d\leq N` dimensional feature vector, all stored in a *feature matrix* :math:`\Phi \in \mathbb{R}^{d\times N}`, so that :math:`\mathbf{L}=\Phi^{\top} \Phi \succeq 0_N`, can be done by following
 
-  	**Step** 0. compute the so-called *dual* kernel :math:`\tilde{L}=\Phi \Phi^{\dagger}\in \mathbb{R}^{d\times}` and eigendecompose it :math:`\tilde{\mathbf{L}} = W \Delta W^{\top}`.
-  	This corresponds to a cost of order :math:`\mathcal{O}(Nd^2 + d^3)`.
+  	**Step** 0. compute the so-called *dual* kernel :math:`\tilde{L}=\Phi \Phi^{\dagger}\in \mathbb{R}^{d\times}`, eigendecompose it :math:`\tilde{\mathbf{L}} = W \Delta W^{\top}` and recover the eigenvectors of :math:`\mathbf{L}` as :math:`V=\Phi^{\top}W \Delta^{-\frac{1}{2}}`
+  	This corresponds to a cost of order :math:`\mathcal{O}(Nd^2 + d^3 + d^2 + Nd^2)`.
 
- 	**Step** :ref:`1. <finite_dpps_exact_sampling_spectral_method_step_1>` is adapted to: draw independent Bernoulli random variables :math:`B_i \sim \operatorname{\mathcal{B}er}(\delta_i)` for :math:`i=1,\dots, d` and collect :math:`\mathcal{B}=\left\{ i ~;~ B_i=1 \right\}`
+ 	**Step** :ref:`1. <finite_dpps_exact_sampling_spectral_method_step_1>` is adapted to: draw independent Bernoulli random variables :math:`B_i \sim \operatorname{\mathcal{B}er}(\frac{\delta_i}{1+\delta_i})` for :math:`i=1,\dots, d` and collect :math:`\mathcal{B}=\left\{ i ~;~ B_i=1 \right\}`
 
-	**Step** :ref:`2. <finite_dpps_exact_sampling_spectral_method_step_2>` is adapted to: sample from the **projection** DPP with correlation kernel defined by its eigenvectors :math:`\Phi^{\top} W_{:,\mathcal{B}} \Delta_{\mathcal{B}}^{-1/2}`.
-	The computation of the eigenvectors requires :math:`\mathcal{O}(Nd|\mathcal{B}|)`.
+	**Step** :ref:`2. <finite_dpps_exact_sampling_spectral_method_step_2>` is adapted to: sample from the **projection** DPP with correlation kernel defined by its eigenvectors :math:`V_{:,\mathcal{B}} = \left[\Phi^{\top} W \Delta^{-1/2}\right]_{:,\mathcal{B}}`.
 
 	.. important::
 
 		Step 0. must be performed once and for all in :math:`\mathcal{O}(Nd^2 + d^3)`.
-		Then the average cost of getting one sample by applying Steps 1. and 2. is :math:`\mathcal{O}(Nd\mathbb{E}\left[|\mathcal{X}|\right] + N \mathbb{E}\left[|\mathcal{X}|\right]^2)`, where :math:`\mathbb{E}\left[|\mathcal{X}|\right]=\operatorname{trace}(\mathbf{\tilde{L}(I+\tilde{L})^{-1}})=\sum_{i=1}^{d} \frac{\delta_i}{1+\delta_i}`
+		Then the average cost of getting one sample by applying Steps 1. and 2. is :math:`\mathcal{O}(N \mathbb{E}\left[|\mathcal{X}|\right]^2)`, where :math:`\mathbb{E}\left[|\mathcal{X}|\right]=\operatorname{trace}(\mathbf{\tilde{L}(I+\tilde{L})^{-1}})=\sum_{i=1}^{d} \frac{\delta_i}{1+\delta_i}\leq d`
 
 	.. seealso::
 

diff --git a/dppy/exact_sampling.py b/dppy/exact_sampling.py
@@ -361,8 +361,8 @@ def dpp_sampler_generic_kernel(K, random_state=None):
 # Phase 1: subsample eigenvectors by drawing independent Bernoulli variables with parameter the eigenvalues of the correlation kernel K.
 def dpp_eig_vecs_selector(ber_params, eig_vecs,
                           random_state=None):
-    """ Phase 1 of exact sampling procedure. Subsample eigenvectors :math:`V` of the initial kernel (correlation :math:`K`, resp. likelihood :math:`L`) to build a projection DPP with kernel :math:`V V^{\\top}` from which sampling is easy.
-    The selection is made based on a realization of Bernoulli variables with parameters related to the eigenvalues of :math:`K`, resp. :math:`L`.
+    """ Phase 1 of exact sampling procedure. Subsample eigenvectors :math:`V` of the initial kernel (correlation :math:`K`, resp. likelihood :math:`L`) to build a projection DPP with kernel :math:`U U^{\\top}` from which sampling is easy.
+    The selection is made based on a realization of Bernoulli variables with parameters to the eigenvalues of :math:`K`.
 
     :param ber_params:
         Parameters of Bernoulli variables
@@ -391,52 +391,6 @@ def dpp_eig_vecs_selector(ber_params, eig_vecs,
 
     return eig_vecs[:, ind_sel]
 
-
-def dpp_eig_vecs_selector_L_dual(eig_vals, eig_vecs, gram_factor,
-                                 random_state=None):
-    """ Subsample eigenvectors :math:`V` of likelihood kernel :math:`L=\\Phi^{\\top} \\Phi` based on the eigendecomposition dual kernel :math:`L'=\\Phi \\Phi^{\\top}`. Note that :math:`L'` and :math:`L'` share the same nonzero eigenvalues.
-
-    :param eig_vals:
-        Collection of eigenvalues of :math:`L_dual` kernel.
-    :type eig_vals:
-        list, array_like
-
-    :param eig_vecs:
-        Collection of eigenvectors of :math:`L_dual` kernel.
-    :type eig_vecs:
-        array_like
-
-    :param gram_factor:
-        Feature matrix :math:`\\Phi`
-    :type gram_factor:
-        array_like
-
-    :return:
-        selected eigenvectors
-    :rtype:
-        array_like
-
-    .. see also::
-
-        Phase 1:
-
-        - :func:`dpp_eig_vecs_selector <dpp_eig_vecs_selector>`
-
-        Phase 2:
-
-        - :func:`proj_dpp_sampler_eig_GS <proj_dpp_sampler_eig_GS>`
-        - :func:`proj_dpp_sampler_eig_GS_bis <proj_dpp_sampler_eig_GS_bis>`
-        - :func:`proj_dpp_sampler_eig_KuTa12 <proj_dpp_sampler_eig_KuTa12>`
-    """
-
-    rng = check_random_state(random_state)
-
-    # Realisation of Bernoulli random variables with params eig_vals
-    ind_sel = rng.rand(eig_vals.size) < eig_vals / (1.0 + eig_vals)
-
-    return gram_factor.T.dot(eig_vecs[:, ind_sel] / np.sqrt(eig_vals[ind_sel]))
-
-
 # Phase 2:
 # Sample projection kernel VV.T where V are the eigvecs selected in Phase 1.
 def proj_dpp_sampler_eig(eig_vecs, mode='GS', size=None,
@@ -448,7 +402,6 @@ def proj_dpp_sampler_eig(eig_vecs, mode='GS', size=None,
         Phase 1:
 
         - :func:`dpp_eig_vecs_selector <dpp_eig_vecs_selector>`
-        - :func:`dpp_eig_vecs_selector_gram_factor <dpp_eig_vecs_selector_gram_factor>`
 
         Phase 2:
 

diff --git a/dppy/finite_dpps.py b/dppy/finite_dpps.py
@@ -24,7 +24,6 @@
                                  proj_dpp_sampler_eig,
                                  dpp_vfx_sampler,
                                  dpp_eig_vecs_selector,
-                                 dpp_eig_vecs_selector_L_dual,
                                  k_dpp_vfx_sampler,
                                  k_dpp_eig_vecs_selector,
                                  elementary_symmetric_polynomials)
@@ -131,7 +130,7 @@ def __init__(self, kernel_type, projection=False, **params):
         self.A_zono = is_full_row_rank(params.get('A_zono', None))
 
         # Attributes relative to L likelihood kernel:
-        # L, L_eig_vals, L_eig_vecs, L_gram_factor, L_dual, L_dual_eig_vals, L_dual_eig_vecs
+        # L, L_eig_vals, L_eig_vecs, L_gram_factor, L_dual
         self.L = is_symmetric(params.get('L', None))
         if self.projection:
             self.L = is_projection(self.L)
@@ -147,8 +146,6 @@ def __init__(self, kernel_type, projection=False, **params):
         # L' "dual" likelihood kernel, L' = Phi Phi.T, Phi = L_gram_factor
         self.L_gram_factor = params.get('L_gram_factor', None)
         self.L_dual = None
-        self.L_dual_eig_vals = None
-        self.L_dual_eig_vecs = None
 
         if self.L_gram_factor is not None:
             Phi = self.L_gram_factor
@@ -366,34 +363,39 @@ def sample_exact(self, mode='GS', **params):
             sampl = proj_dpp_sampler_eig(V, self.sampling_mode,
                                          random_state=rng)
 
-        elif self.L_dual_eig_vals is not None:
-            # Phase 1
-            V = dpp_eig_vecs_selector_L_dual(self.L_dual_eig_vals,
-                                             self.L_dual_eig_vecs,
-                                             self.L_gram_factor,
-                                             random_state=rng)
-            # Phase 2
-            sampl = proj_dpp_sampler_eig(V, self.sampling_mode,
-                                         random_state=rng)
+        # elif self.L_dual_eig_vals is not None:
+        #     # Phase 1
+        #     V = dpp_eig_vecs_selector_L_dual(self.L_dual_eig_vals,
+        #                                      self.L_dual_eig_vecs,
+        #                                      self.L_gram_factor,
+        #                                      random_state=rng)
+        #     # Phase 2
+        #     sampl = proj_dpp_sampler_eig(V, self.sampling_mode,
+        #                                  random_state=rng)
+        #
 
         elif self.L_eig_vals is not None:
             self.K_eig_vals = self.L_eig_vals / (1.0 + self.L_eig_vals)
             return self.sample_exact(mode=self.sampling_mode,
                                      random_state=rng)
 
+        elif self.L_dual is not None:
+            # L_dual = Phi Phi.T = W Theta W.T
+            # L = Phi.T Phi = V Gamma V
+            # implies Gamma = Theta and V = Phi.T W Theta^{-1/2}
+            self.L_eig_vals, L_dual_eig_vecs = la.eigh(self.L_dual)
+            self.L_eig_vals = is_geq_0(self.L_eig_vals)
+            self.eig_vecs =self.L_gram_factor.T.dot(L_dual_eig_vecs
+                                                    / np.sqrt(self.L_eig_vals))
+            return self.sample_exact(mode=self.sampling_mode,
+                                     random_state=rng)
+
         # If DPP defined via projection correlation kernel K
         # no eigendecomposition required
         elif self.K is not None and self.projection:
             sampl = proj_dpp_sampler_kernel(self.K, self.sampling_mode,
                                             random_state=rng)
 
-        elif self.L_dual is not None:
-            self.L_dual_eig_vals, self.L_dual_eig_vecs =\
-                la.eigh(self.L_dual)
-            self.L_dual_eig_vals = is_geq_0(self.L_dual_eig_vals)
-            return self.sample_exact(mode=self.sampling_mode,
-                                     random_state=rng)
-
         elif self.K is not None:
             self.K_eig_vals, self.eig_vecs = la.eigh(self.K)
             self.K_eig_vals = is_in_01(self.K_eig_vals)
@@ -585,15 +587,6 @@ def sample_exact_k_dpp(self, size, mode='GS', **params):
             sampl = proj_dpp_sampler_eig(V, self.sampling_mode,
                                          random_state=rng)
 
-        elif self.L_dual_eig_vals is not None:
-            # There is
-            self.L_eig_vals = self.L_dual_eig_vals
-            self.eig_vecs =\
-                self.L_gram_factor.T.dot(
-                    self.L_dual_eig_vecs / np.sqrt(self.L_dual_eig_vals))
-            return self.sample_exact_k_dpp(size, self.sampling_mode,
-                                           random_state=rng)
-
         elif self.K_eig_vals is not None:
             np.seterr(divide='raise')
             self.L_eig_vals = self.K_eig_vals / (1.0 - self.K_eig_vals)
@@ -602,9 +595,19 @@ def sample_exact_k_dpp(self, size, mode='GS', **params):
 
         # Otherwise eigendecomposition is necessary
         elif self.L_dual is not None:
-            self.L_dual_eig_vals, self.L_dual_eig_vecs =\
-                la.eigh(self.L_dual)
-            self.L_dual_eig_vals = is_geq_0(self.L_dual_eig_vals)
+            # L_dual = Phi Phi.T = W Theta W.T
+            # L = Phi.T Phi = V Gamma V.T
+            # implies Gamma = Theta and V = Phi.T W Theta^{-1/2}
+            self.L_eig_vals, L_dual_eig_vecs = la.eigh(self.L_dual)
+            self.L_eig_vals = is_geq_0(self.L_eig_vals)
+            self.eig_vecs =self.L_gram_factor.T.dot(L_dual_eig_vecs
+                                                    / np.sqrt(self.L_eig_vals))
+            return self.sample_exact_k_dpp(size, mode=self.sampling_mode,
+                                           random_state=rng)
+
+        elif self.L is not None:
+            self.L_eig_vals, self.eig_vecs = la.eigh(self.L)
+            self.L_eig_vals = is_geq_0(self.L_eig_vals)
             return self.sample_exact_k_dpp(size, self.sampling_mode,
                                            random_state=rng)
 
@@ -614,12 +617,6 @@ def sample_exact_k_dpp(self, size, mode='GS', **params):
             return self.sample_exact_k_dpp(size, self.sampling_mode,
                                            random_state=rng)
 
-        elif self.L is not None:
-            self.L_eig_vals, self.eig_vecs = la.eigh(self.L)
-            self.L_eig_vals = is_geq_0(self.L_eig_vals)
-            return self.sample_exact_k_dpp(size, self.sampling_mode,
-                                           random_state=rng)
-
         elif self.eval_L is not None and self.X_data is not None:
             # In case mode!='vfx'
             self.compute_L()

diff --git a/tests/test_finite_dpps_samplers.py b/tests/test_finite_dpps_samplers.py
@@ -45,51 +45,58 @@ def singleton_adequation(dpp, samples, tol=0.05):
         """Perform chi-square test to check that
         P[{i} C X] = K_ii
         """
-        dpp.compute_K()
-        N = len(dpp.K)
 
-        marginal_th = np.diag(dpp.K) / np.trace(dpp.K)
+        if dpp.size_k_dpp:
+            return True, 'We do not check inclusion probabilities for k-DPPs'
+        else:
+            dpp.compute_K()
+            N = len(dpp.K)
+
+            marginal_th = np.diag(dpp.K) / np.trace(dpp.K)
 
-        samples_as_singletons = list(chain.from_iterable(samples))
-        marginal_emp, _ = np.histogram(samples_as_singletons,
-                                       bins=range(N + 1),
-                                       density=True)
+            samples_as_singletons = list(chain.from_iterable(samples))
+            marginal_emp, _ = np.histogram(samples_as_singletons,
+                                           bins=range(N + 1),
+                                           density=True)
 
-        _, pval = chisquare(f_obs=marginal_emp, f_exp=marginal_th)
+            _, pval = chisquare(f_obs=marginal_emp, f_exp=marginal_th)
 
-        msg = 'pval = {}, emp = {}, th = {}'.format(pval,
-                                                    marginal_emp,
-                                                    marginal_th)
+            msg = 'pval = {}, emp = {}, th = {}'.format(pval,
+                                                        marginal_emp,
+                                                        marginal_th)
 
-        return pval > tol, msg
+            return pval > tol, msg
 
     @staticmethod
     def doubleton_adequation(dpp, samples, tol=0.05):
         """Perform chi-square test to check that
         P[{i, j} C X] = det [[K_ii, K_ij], [K_ji, K_jj]]
         """
-        samples = list(map(set, samples))
-        dpp.compute_K()
-        N = len(dpp.K)
+        if dpp.size_k_dpp:
+            return True, 'We do not check inclusion probabilities for k-DPPs'
+        else:
+            samples = list(map(set, samples))
+            dpp.compute_K()
+            N = len(dpp.K)
 
-        nb_doubletons = 10
-        doubletons = [set(rndm.choice(N, size=2, replace=False))
-                      for _ in range(nb_doubletons)]
+            nb_doubletons = 10
+            doubletons = [set(rndm.choice(N, size=2, replace=False))
+                          for _ in range(nb_doubletons)]
 
-        # det [[K_ii, K_ij], [K_ji, K_jj]]
-        marginal_th = [det_ST(dpp.K, list(d)) for d in doubletons]
+            # det [[K_ii, K_ij], [K_ji, K_jj]]
+            marginal_th = [det_ST(dpp.K, list(d)) for d in doubletons]
 
-        counts = [sum([doubl.issubset(sampl) for sampl in samples])
-                  for doubl in doubletons]
-        marginal_emp = np.array(counts) / len(samples)
+            counts = [sum([doubl.issubset(sampl) for sampl in samples])
+                      for doubl in doubletons]
+            marginal_emp = np.array(counts) / len(samples)
 
-        _, pval = chisquare(f_obs=marginal_emp, f_exp=marginal_th)
+            _, pval = chisquare(f_obs=marginal_emp, f_exp=marginal_th)
 
-        msg = 'pval = {}, emp = {}, th = {}'.format(pval,
-                                                    marginal_emp,
-                                                    marginal_th)
+            msg = 'pval = {}, emp = {}, th = {}'.format(pval,
+                                                        marginal_emp,
+                                                        marginal_th)
 
-        return pval > tol, msg
+            return pval > tol, msg
 
     @staticmethod
     def uniqueness_of_items(dpp, samples):
@@ -311,7 +318,7 @@ def test_dpp_adequation_with_non_projection_likelihood_kernel(self):
              (False,
                 {'L_eig_dec': (self.e_vals_geq_0, self.e_vecs)}),
              (False,
-                {'L_gram_factor': self.phi})]
+                {'L_gram_factor': self.phi})]  # L_gram_factor to test L_dual
 
         k = self.rank // 2