Merge pull request #65 from giotto-ai/main

Create v0.2.2
giotto-ai · Aug 16, 2022 · c5465b7 · c5465b7
2 parents fb0fcef + ab0454d
commit c5465b7
Show file tree

Hide file tree

Showing 10 changed files with 265 additions and 132 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: [3.6, 3.7, 3.8, 3.9, '3.10']
+        python-version: [3.7, 3.8, 3.9, '3.10']
         include:
           - os: ubuntu-latest
             path: ~/.cache/pip

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -16,27 +16,19 @@ jobs:
       - uses: actions/checkout@v2
 
       - name: Build wheels
-        uses: pypa/cibuildwheel@v2.2.2
+        uses: pypa/cibuildwheel@v2.3.1
         env:
           # Specify which Python versions to build wheels
           # https://cibuildwheel.readthedocs.io/en/stable/options/#build-skip
-          CIBW_BUILD: "cp36-* cp37-* cp38-* cp39-*"
+          CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-*"
           # Skip 32 bit architectures, musllinux, and i686
           CIBW_SKIP: "*-win32 *-musllinux_x86_64 *_i686"
           CIBW_BEFORE_BUILD: python -m pip install cmake
           CIBW_TEST_COMMAND: python -m pytest {package}/gph/python/test
           CIBW_TEST_REQUIRES: pytest hypothesis
-
-      # SciPy do not ship manylinux2010 wheels for Python 3.10, so we too build ours for manylinux2014
-      - name: Build wheels for Python 3.10
-        uses: pypa/cibuildwheel@v2.2.2
-        env:
-          CIBW_BUILD: "cp310-*"
-          CIBW_SKIP: "*-win32 *-musllinux_x86_64 *_i686"
-          CIBW_BEFORE_BUILD: python -m pip install cmake
-          CIBW_TEST_COMMAND: python -m pytest {package}/gph/python/test
-          CIBW_TEST_REQUIRES: pytest hypothesis
           CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
+          # Should generate universal2 wheels for CP3.8 -- CP3.10
+          CIBW_ARCHS_MACOS: universal2
 
       - uses: actions/upload-artifact@v2
         name: Upload wheels

diff --git a/RELEASE.rst b/RELEASE.rst
@@ -1,3 +1,34 @@
+Release 0.2.2
+=============
+
+Minor release bringing bug fixes, performance improvements, wheels for Apple Silicon, and EOS for Python 3.6.
+
+Major Features and Improvements
+-------------------------------
+
+- The dense matrix C++ backend has been extended to allow for nonzero vertex weights. This can lead to large speedups when computing weighted Rips filtrations (`#61 <https://github.com/giotto-ai/giotto-ph/pull/61>`_).
+- The binary search routine to find the largest-indexed vertex in a simplex (``get_max_vertex`` in the C++ backend, as in ``Ripser``) has been replaced with a faster floating-point routine in the case of 1-simplices (edges). This still gives exact results for all cases of interest, and can be substantially faster (`#38 <https://github.com/giotto-ai/giotto-ph/pull/38>`_).
+- Wheels for Apple Silicon are now available for Python versions 3.8, 3.9 and 3.10 (`#62 <https://github.com/giotto-ai/giotto-ph/pull/62>`_).
+
+Bug Fixes
+---------
+
+- Bars in the barcode with death at ``numpy.inf`` are now explicitly treated as essential bars instead of finite bars (`#53 <https://github.com/giotto-ai/giotto-ph/pull/53>`_).
+
+Backwards-Incompatible Changes
+------------------------------
+
+- Python 3.6 is no longer supported, and the manylinux standard has been bumped from ``manylinux2010`` to ``manylinux2014`` (`#62 <https://github.com/giotto-ai/giotto-ph/pull/62>`_).
+
+Thanks to our Contributors
+--------------------------
+
+This release contains contributions from many people:
+
+Umberto Lupo and Julian Burella Pérez.
+
+We are also grateful to all who filed issues or helped resolve them, asked and answered questions, and were part of inspiring discussions.
+
 Release 0.2.1
 =============
 

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -24,7 +24,7 @@
 author = 'Julian Burella Perez'
 
 # The full version, including alpha/beta/rc tags
-release = '0.2.1'
+release = '0.2.2'
 
 
 # -- General configuration ---------------------------------------------------

diff --git a/gph/_version.py b/gph/_version.py
@@ -19,4 +19,4 @@
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
 
-__version__ = "0.2.1"
+__version__ = "0.2.2"
diff --git a/gph/bindings/ripser_bindings.cpp b/gph/bindings/ripser_bindings.cpp
@@ -68,25 +68,30 @@ PYBIND11_MODULE(gph_ripser, m)
 
     m.def(
         "rips_dm",
-        [](py::array_t<value_t>& D, int N, int modulus, int dim_max,
-           float threshold, int num_threads, bool return_generators) {
+        [](py::array_t<value_t>& D, py::array_t<value_t>& diag, int modulus,
+           int dim_max, float threshold, int num_threads,
+           bool return_generators) {
             // Setup distance matrix and figure out threshold
             auto D_ = static_cast<value_t*>(D.request().ptr);
-            std::vector<value_t> distances(D_, D_ + N);
+            std::vector<value_t> distances(D_, D_ + D.size());
+            auto diag_ = static_cast<value_t*>(diag.request().ptr);
+            std::vector<value_t> diagonal(diag_, diag_ + diag.size());
+
             compressed_lower_distance_matrix dist =
                 compressed_lower_distance_matrix(
-                    compressed_upper_distance_matrix(std::move(distances)));
+                    compressed_upper_distance_matrix(std::move(distances),
+                                                     std::move(diagonal)));
 
             ripserResults res;
             ripser<compressed_lower_distance_matrix> r(
-                std::move(dist), dim_max, threshold, modulus,
-                num_threads, return_generators);
+                std::move(dist), dim_max, threshold, modulus, num_threads,
+                return_generators);
             r.compute_barcodes();
             r.copy_results(res);
             return res;
         },
-        "D"_a, "N"_a, "modulus"_a, "dim_max"_a, "threshold"_a, "num_threads"_a,
-        "return_generators"_a, "ripser distance matrix");
+        "D"_a, "diag"_a, "modulus"_a, "dim_max"_a, "threshold"_a,
+        "num_threads"_a, "return_generators"_a, "ripser distance matrix");
 
     m.def(
         "rips_dm_sparse",
@@ -100,8 +105,7 @@ PYBIND11_MODULE(gph_ripser, m)
             // Setup distance matrix and figure out threshold
             ripser<sparse_distance_matrix> r(
                 sparse_distance_matrix(I_, J_, V_, NEdges, N, threshold),
-                dim_max, threshold, modulus, num_threads,
-                return_generators);
+                dim_max, threshold, modulus, num_threads, return_generators);
             r.compute_barcodes();
 
             ripserResults res;

diff --git a/gph/python/ripser_interface.py b/gph/python/ripser_interface.py
@@ -21,7 +21,7 @@
 from warnings import catch_warnings, simplefilter
 
 import numpy as np
-from scipy.sparse import issparse, csr_matrix, coo_matrix, triu
+from scipy.sparse import issparse, csr_matrix, triu
 from scipy.spatial.distance import squareform
 from sklearn.exceptions import EfficiencyWarning
 from sklearn.metrics.pairwise import pairwise_distances
@@ -34,16 +34,14 @@
 MAX_COEFF_SUPPORTED = gph_ripser.get_max_coefficient_field_supported()
 
 
-def _compute_ph_vr_dense(DParam, maxHomDim, thresh=-1, coeff=2, n_threads=1,
-                         return_generators=False):
+def _compute_ph_vr_dense(DParam, diagonal, maxHomDim, thresh=-1, coeff=2,
+                         n_threads=1, return_generators=False):
     if coeff == 2:
-        ret = gph_ripser.rips_dm(DParam, DParam.shape[0], coeff,
-                                 maxHomDim, thresh, n_threads,
-                                 return_generators)
+        ret = gph_ripser.rips_dm(DParam, diagonal, coeff, maxHomDim, thresh,
+                                 n_threads, return_generators)
     else:
-        ret = gph_ripser_coeff.rips_dm(DParam, DParam.shape[0], coeff,
-                                       maxHomDim, thresh, n_threads,
-                                       return_generators)
+        ret = gph_ripser_coeff.rips_dm(DParam, diagonal, coeff, maxHomDim,
+                                       thresh, n_threads, return_generators)
     return ret
 
 
@@ -108,6 +106,27 @@ def _collapse_coo(row, col, data, thresh):
             np.hstack([data_diag, data]))
 
 
+def _collapse_dense(dm, thresh):
+    """Run edge collapser on off-diagonal data and then reinsert diagonal
+    data if any non-zero value is present."""
+
+    # Use 32-bit float precision here so when diagonal is extracted,
+    # it is still 32-bit in the entire function operations.
+    dm = dm.astype(np.float32)
+
+    row, col, data = \
+        gph_collapser.flag_complex_collapse_edges_dense(dm, thresh)
+
+    data_diag = dm.diagonal()
+    if (data_diag != 0).any():
+        indices = np.arange(data_diag.shape[0])
+        row = np.hstack([indices, row])
+        col = np.hstack([indices, col])
+        data = np.hstack([data_diag, data])
+
+    return row, col, data
+
+
 def _compute_dtm_weights(dm, n_neighbors, weights_r):
     with catch_warnings():
         simplefilter("ignore", category=EfficiencyWarning)
@@ -558,30 +577,19 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
         if weights is not None:
             dm = _compute_weights(dm, weights, weight_params, n_points)
 
-        compute_enclosing_radius = False
-        nonzero_in_diag = (dm.diagonal() != 0).any()
-        if not nonzero_in_diag:
+        apply_user_threshold = thresh != np.inf
+        if not apply_user_threshold:
             # Compute ideal threshold only when a distance matrix is passed
             # as input without specifying any threshold
             # We check if any element and if no entries is present in the
             # diagonal. This allows to have the enclosing radius before
             # calling collapser if computed
-            if thresh == np.inf:
-                thresh = _ideal_thresh(dm, thresh)
-                compute_enclosing_radius = True
-
-        if nonzero_in_diag:
-            # Convert to sparse format, because currently that's the only one
-            # handling nonzero births
-            (row, col) = np.triu_indices_from(dm)
-            data = dm[(row, col)]
-            if collapse_edges:
-                row, col, data = _collapse_coo(row, col, data, thresh)
-        elif collapse_edges:
-            row, col, data = gph_collapser.\
-                flag_complex_collapse_edges_dense(dm.astype(np.float32),
-                                                  thresh)
-        elif not compute_enclosing_radius:
+            thresh = _ideal_thresh(dm, thresh)
+
+        if collapse_edges:
+            row, col, data = _collapse_dense(dm, thresh)
+
+        elif apply_user_threshold:
             # If the user specifies a threshold, we use a sparse representation
             # like Ripser does
             row, col = np.nonzero(dm <= thresh)
@@ -604,10 +612,11 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
             return_generators
             )
     else:
-        # Only consider strict upper diagonal
+        # Only consider upper diagonal
+        diagonal = np.diagonal(dm).astype(np.float32)
         DParam = squareform(dm, checks=False).astype(np.float32)
-        res = _compute_ph_vr_dense(DParam, maxdim, thresh, coeff, n_threads,
-                                   return_generators)
+        res = _compute_ph_vr_dense(DParam, diagonal, maxdim, thresh,
+                                   coeff, n_threads, return_generators)
 
     # Unwrap persistence diagrams
     # Barcodes must match the inner type of C++ core filtration value.

diff --git a/gph/python/test/test_ripser.py b/gph/python/test/test_ripser.py
@@ -10,6 +10,12 @@
 from gph import ripser_parallel as ripser
 
 
+def make_dm_symmetric(dm):
+    # Extract strict upper diagonal and make symmetric
+    dm = np.triu(dm.astype(np.float32), k=1)
+    return dm + dm.T
+
+
 @composite
 def get_dense_distance_matrices(draw):
     """Generate 2d dense square arrays of floats, with zero along the
@@ -22,10 +28,7 @@ def get_dense_distance_matrices(draw):
                                      exclude_min=True,
                                      width=32),
                      shape=(shapes, shapes), unique=False))
-    # Extract strict upper diagonal and make symmetric
-    dm = np.triu(dm.astype(np.float32), k=1)
-    dm = dm + dm.T
-    return dm
+    return make_dm_symmetric(dm)
 
 
 @composite
@@ -133,7 +136,7 @@ def test_collapser_with_negative_weights():
     """Test that collapser works as expected when some of the vertex and edge
     weights are negative."""
     n_points = 20
-    dm = np.random.random((n_points, n_points))
+    dm = make_dm_symmetric(np.random.random((n_points, n_points)))
     np.fill_diagonal(dm, -np.random.random(n_points))
     dm -= 0.2
     dm_sparse = coo_matrix(dm)
@@ -294,12 +297,12 @@ def test_gens_order_vertices_higher_dimension():
     one in the reverse colexicographic order used to build the simplexwise
     refinement of the Vietoris-Rips filtration."""
     diamond = np.array(
-        [[0,      1,      100,    1,      1,      1  ],
-         [0,      0,      1,      100,    1,      1  ],
-         [0,      0,      0,      1,      1,      1  ],
-         [0,      0,      0,      0,      1,      1  ],
-         [0,      0,      0,      0,      0,      100],
-         [0,      0,      0,      0,      0,      0  ]],
+        [[0,      1,    100,      1,      1,      1],
+         [0,      0,      1,    100,      1,      1],
+         [0,      0,      0,      1,      1,      1],
+         [0,      0,      0,      0,      1,      1],
+         [0,      0,      0,      0,      0,    100],
+         [0,      0,      0,      0,      0,      0]],
         dtype=np.float64)
 
     diamond += diamond.T
@@ -431,3 +434,45 @@ def test_unsupported_coefficient():
     with pytest.raises(ValueError):
         ripser(X, metric='precomputed',
                coeff=gph_ripser.get_max_coefficient_field_supported()+1)
+
+
+@settings(deadline=500)
+@given(dm_dense=get_dense_distance_matrices())
+def test_non_0_diagonal_internal_representation(dm_dense):
+    """Checks that, when passing a full distance matrix with non-zero values in
+    the diagonal, the result is the same regardless of whether the input is in
+    dense or sparse format."""
+    diagonal = np.random.random(dm_dense.shape[0])
+
+    # Ensure that all entries are bigger than the diagonal
+    dm_dense = dm_dense + 1
+    np.fill_diagonal(dm_dense, diagonal)
+
+    dgms1 = ripser(dm_dense, maxdim=2, metric='precomputed')['dgms']
+    dgms2 = ripser(coo_matrix(dm_dense), maxdim=2,
+                   metric='precomputed')['dgms']
+
+    for bars1, bars2 in zip(dgms1, dgms2):
+        assert_array_equal(bars1, bars2)
+
+
+def test_infinite_deaths_always_essential():
+    """Regression test for issue #37"""
+    diamond_dm = np.array(
+        [[0,      1,      np.inf, 1,      1,      1],
+         [0,      0,      1,      np.inf, 1,      1],
+         [0,      0,      0,      1,      1,      1],
+         [0,      0,      0,      0,      1,      1],
+         [0,      0,      0,      0,      0,      np.inf],
+         [0,      0,      0,      0,      0,      0]],
+        dtype=np.float64
+    )
+    diamond_dm += diamond_dm.T
+
+    gens = ripser(diamond_dm, metric="precomputed", maxdim=2,
+                  return_generators=True)["gens"]
+
+    gens_fin_dim1 = gens[1][1]
+
+    # With this example no finite generators in dimension 1 shall be found
+    assert len(gens_fin_dim1) == 0