Skip to content

Commit

Permalink
Merge pull request #65 from giotto-ai/main
Browse files Browse the repository at this point in the history
Create v0.2.2
  • Loading branch information
ulupo committed Aug 16, 2022
2 parents fb0fcef + ab0454d commit c5465b7
Show file tree
Hide file tree
Showing 10 changed files with 265 additions and 132 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: [3.6, 3.7, 3.8, 3.9, '3.10']
python-version: [3.7, 3.8, 3.9, '3.10']
include:
- os: ubuntu-latest
path: ~/.cache/pip
Expand Down
16 changes: 4 additions & 12 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,19 @@ jobs:
- uses: actions/checkout@v2

- name: Build wheels
uses: pypa/cibuildwheel@v2.2.2
uses: pypa/cibuildwheel@v2.3.1
env:
# Specify which Python versions to build wheels
# https://cibuildwheel.readthedocs.io/en/stable/options/#build-skip
CIBW_BUILD: "cp36-* cp37-* cp38-* cp39-*"
CIBW_BUILD: "cp37-* cp38-* cp39-* cp310-*"
# Skip 32 bit architectures, musllinux, and i686
CIBW_SKIP: "*-win32 *-musllinux_x86_64 *_i686"
CIBW_BEFORE_BUILD: python -m pip install cmake
CIBW_TEST_COMMAND: python -m pytest {package}/gph/python/test
CIBW_TEST_REQUIRES: pytest hypothesis

# SciPy do not ship manylinux2010 wheels for Python 3.10, so we too build ours for manylinux2014
- name: Build wheels for Python 3.10
uses: pypa/cibuildwheel@v2.2.2
env:
CIBW_BUILD: "cp310-*"
CIBW_SKIP: "*-win32 *-musllinux_x86_64 *_i686"
CIBW_BEFORE_BUILD: python -m pip install cmake
CIBW_TEST_COMMAND: python -m pytest {package}/gph/python/test
CIBW_TEST_REQUIRES: pytest hypothesis
CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
# Should generate universal2 wheels for CP3.8 -- CP3.10
CIBW_ARCHS_MACOS: universal2

- uses: actions/upload-artifact@v2
name: Upload wheels
Expand Down
31 changes: 31 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,34 @@
Release 0.2.2
=============

Minor release bringing bug fixes, performance improvements, wheels for Apple Silicon, and EOS for Python 3.6.

Major Features and Improvements
-------------------------------

- The dense matrix C++ backend has been extended to allow for nonzero vertex weights. This can lead to large speedups when computing weighted Rips filtrations (`#61 <https://github.com/giotto-ai/giotto-ph/pull/61>`_).
- The binary search routine to find the largest-indexed vertex in a simplex (``get_max_vertex`` in the C++ backend, as in ``Ripser``) has been replaced with a faster floating-point routine in the case of 1-simplices (edges). This still gives exact results for all cases of interest, and can be substantially faster (`#38 <https://github.com/giotto-ai/giotto-ph/pull/38>`_).
- Wheels for Apple Silicon are now available for Python versions 3.8, 3.9 and 3.10 (`#62 <https://github.com/giotto-ai/giotto-ph/pull/62>`_).

Bug Fixes
---------

- Bars in the barcode with death at ``numpy.inf`` are now explicitly treated as essential bars instead of finite bars (`#53 <https://github.com/giotto-ai/giotto-ph/pull/53>`_).

Backwards-Incompatible Changes
------------------------------

- Python 3.6 is no longer supported, and the manylinux standard has been bumped from ``manylinux2010`` to ``manylinux2014`` (`#62 <https://github.com/giotto-ai/giotto-ph/pull/62>`_).

Thanks to our Contributors
--------------------------

This release contains contributions from many people:

Umberto Lupo and Julian Burella Pérez.

We are also grateful to all who filed issues or helped resolve them, asked and answered questions, and were part of inspiring discussions.

Release 0.2.1
=============

Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
author = 'Julian Burella Perez'

# The full version, including alpha/beta/rc tags
release = '0.2.1'
release = '0.2.2'


# -- General configuration ---------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion gph/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#

__version__ = "0.2.1"
__version__ = "0.2.2"
24 changes: 14 additions & 10 deletions gph/bindings/ripser_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,25 +68,30 @@ PYBIND11_MODULE(gph_ripser, m)

m.def(
"rips_dm",
[](py::array_t<value_t>& D, int N, int modulus, int dim_max,
float threshold, int num_threads, bool return_generators) {
[](py::array_t<value_t>& D, py::array_t<value_t>& diag, int modulus,
int dim_max, float threshold, int num_threads,
bool return_generators) {
// Setup distance matrix and figure out threshold
auto D_ = static_cast<value_t*>(D.request().ptr);
std::vector<value_t> distances(D_, D_ + N);
std::vector<value_t> distances(D_, D_ + D.size());
auto diag_ = static_cast<value_t*>(diag.request().ptr);
std::vector<value_t> diagonal(diag_, diag_ + diag.size());

compressed_lower_distance_matrix dist =
compressed_lower_distance_matrix(
compressed_upper_distance_matrix(std::move(distances)));
compressed_upper_distance_matrix(std::move(distances),
std::move(diagonal)));

ripserResults res;
ripser<compressed_lower_distance_matrix> r(
std::move(dist), dim_max, threshold, modulus,
num_threads, return_generators);
std::move(dist), dim_max, threshold, modulus, num_threads,
return_generators);
r.compute_barcodes();
r.copy_results(res);
return res;
},
"D"_a, "N"_a, "modulus"_a, "dim_max"_a, "threshold"_a, "num_threads"_a,
"return_generators"_a, "ripser distance matrix");
"D"_a, "diag"_a, "modulus"_a, "dim_max"_a, "threshold"_a,
"num_threads"_a, "return_generators"_a, "ripser distance matrix");

m.def(
"rips_dm_sparse",
Expand All @@ -100,8 +105,7 @@ PYBIND11_MODULE(gph_ripser, m)
// Setup distance matrix and figure out threshold
ripser<sparse_distance_matrix> r(
sparse_distance_matrix(I_, J_, V_, NEdges, N, threshold),
dim_max, threshold, modulus, num_threads,
return_generators);
dim_max, threshold, modulus, num_threads, return_generators);
r.compute_barcodes();

ripserResults res;
Expand Down
71 changes: 40 additions & 31 deletions gph/python/ripser_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from warnings import catch_warnings, simplefilter

import numpy as np
from scipy.sparse import issparse, csr_matrix, coo_matrix, triu
from scipy.sparse import issparse, csr_matrix, triu
from scipy.spatial.distance import squareform
from sklearn.exceptions import EfficiencyWarning
from sklearn.metrics.pairwise import pairwise_distances
Expand All @@ -34,16 +34,14 @@
MAX_COEFF_SUPPORTED = gph_ripser.get_max_coefficient_field_supported()


def _compute_ph_vr_dense(DParam, maxHomDim, thresh=-1, coeff=2, n_threads=1,
return_generators=False):
def _compute_ph_vr_dense(DParam, diagonal, maxHomDim, thresh=-1, coeff=2,
n_threads=1, return_generators=False):
if coeff == 2:
ret = gph_ripser.rips_dm(DParam, DParam.shape[0], coeff,
maxHomDim, thresh, n_threads,
return_generators)
ret = gph_ripser.rips_dm(DParam, diagonal, coeff, maxHomDim, thresh,
n_threads, return_generators)
else:
ret = gph_ripser_coeff.rips_dm(DParam, DParam.shape[0], coeff,
maxHomDim, thresh, n_threads,
return_generators)
ret = gph_ripser_coeff.rips_dm(DParam, diagonal, coeff, maxHomDim,
thresh, n_threads, return_generators)
return ret


Expand Down Expand Up @@ -108,6 +106,27 @@ def _collapse_coo(row, col, data, thresh):
np.hstack([data_diag, data]))


def _collapse_dense(dm, thresh):
"""Run edge collapser on off-diagonal data and then reinsert diagonal
data if any non-zero value is present."""

# Use 32-bit float precision here so when diagonal is extracted,
# it is still 32-bit in the entire function operations.
dm = dm.astype(np.float32)

row, col, data = \
gph_collapser.flag_complex_collapse_edges_dense(dm, thresh)

data_diag = dm.diagonal()
if (data_diag != 0).any():
indices = np.arange(data_diag.shape[0])
row = np.hstack([indices, row])
col = np.hstack([indices, col])
data = np.hstack([data_diag, data])

return row, col, data


def _compute_dtm_weights(dm, n_neighbors, weights_r):
with catch_warnings():
simplefilter("ignore", category=EfficiencyWarning)
Expand Down Expand Up @@ -558,30 +577,19 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
if weights is not None:
dm = _compute_weights(dm, weights, weight_params, n_points)

compute_enclosing_radius = False
nonzero_in_diag = (dm.diagonal() != 0).any()
if not nonzero_in_diag:
apply_user_threshold = thresh != np.inf
if not apply_user_threshold:
# Compute ideal threshold only when a distance matrix is passed
# as input without specifying any threshold
# We check if any element and if no entries is present in the
# diagonal. This allows to have the enclosing radius before
# calling collapser if computed
if thresh == np.inf:
thresh = _ideal_thresh(dm, thresh)
compute_enclosing_radius = True

if nonzero_in_diag:
# Convert to sparse format, because currently that's the only one
# handling nonzero births
(row, col) = np.triu_indices_from(dm)
data = dm[(row, col)]
if collapse_edges:
row, col, data = _collapse_coo(row, col, data, thresh)
elif collapse_edges:
row, col, data = gph_collapser.\
flag_complex_collapse_edges_dense(dm.astype(np.float32),
thresh)
elif not compute_enclosing_radius:
thresh = _ideal_thresh(dm, thresh)

if collapse_edges:
row, col, data = _collapse_dense(dm, thresh)

elif apply_user_threshold:
# If the user specifies a threshold, we use a sparse representation
# like Ripser does
row, col = np.nonzero(dm <= thresh)
Expand All @@ -604,10 +612,11 @@ def ripser_parallel(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
return_generators
)
else:
# Only consider strict upper diagonal
# Only consider upper diagonal
diagonal = np.diagonal(dm).astype(np.float32)
DParam = squareform(dm, checks=False).astype(np.float32)
res = _compute_ph_vr_dense(DParam, maxdim, thresh, coeff, n_threads,
return_generators)
res = _compute_ph_vr_dense(DParam, diagonal, maxdim, thresh,
coeff, n_threads, return_generators)

# Unwrap persistence diagrams
# Barcodes must match the inner type of C++ core filtration value.
Expand Down
67 changes: 56 additions & 11 deletions gph/python/test/test_ripser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
from gph import ripser_parallel as ripser


def make_dm_symmetric(dm):
# Extract strict upper diagonal and make symmetric
dm = np.triu(dm.astype(np.float32), k=1)
return dm + dm.T


@composite
def get_dense_distance_matrices(draw):
"""Generate 2d dense square arrays of floats, with zero along the
Expand All @@ -22,10 +28,7 @@ def get_dense_distance_matrices(draw):
exclude_min=True,
width=32),
shape=(shapes, shapes), unique=False))
# Extract strict upper diagonal and make symmetric
dm = np.triu(dm.astype(np.float32), k=1)
dm = dm + dm.T
return dm
return make_dm_symmetric(dm)


@composite
Expand Down Expand Up @@ -133,7 +136,7 @@ def test_collapser_with_negative_weights():
"""Test that collapser works as expected when some of the vertex and edge
weights are negative."""
n_points = 20
dm = np.random.random((n_points, n_points))
dm = make_dm_symmetric(np.random.random((n_points, n_points)))
np.fill_diagonal(dm, -np.random.random(n_points))
dm -= 0.2
dm_sparse = coo_matrix(dm)
Expand Down Expand Up @@ -294,12 +297,12 @@ def test_gens_order_vertices_higher_dimension():
one in the reverse colexicographic order used to build the simplexwise
refinement of the Vietoris-Rips filtration."""
diamond = np.array(
[[0, 1, 100, 1, 1, 1 ],
[0, 0, 1, 100, 1, 1 ],
[0, 0, 0, 1, 1, 1 ],
[0, 0, 0, 0, 1, 1 ],
[0, 0, 0, 0, 0, 100],
[0, 0, 0, 0, 0, 0 ]],
[[0, 1, 100, 1, 1, 1],
[0, 0, 1, 100, 1, 1],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, 100],
[0, 0, 0, 0, 0, 0]],
dtype=np.float64)

diamond += diamond.T
Expand Down Expand Up @@ -431,3 +434,45 @@ def test_unsupported_coefficient():
with pytest.raises(ValueError):
ripser(X, metric='precomputed',
coeff=gph_ripser.get_max_coefficient_field_supported()+1)


@settings(deadline=500)
@given(dm_dense=get_dense_distance_matrices())
def test_non_0_diagonal_internal_representation(dm_dense):
"""Checks that, when passing a full distance matrix with non-zero values in
the diagonal, the result is the same regardless of whether the input is in
dense or sparse format."""
diagonal = np.random.random(dm_dense.shape[0])

# Ensure that all entries are bigger than the diagonal
dm_dense = dm_dense + 1
np.fill_diagonal(dm_dense, diagonal)

dgms1 = ripser(dm_dense, maxdim=2, metric='precomputed')['dgms']
dgms2 = ripser(coo_matrix(dm_dense), maxdim=2,
metric='precomputed')['dgms']

for bars1, bars2 in zip(dgms1, dgms2):
assert_array_equal(bars1, bars2)


def test_infinite_deaths_always_essential():
"""Regression test for issue #37"""
diamond_dm = np.array(
[[0, 1, np.inf, 1, 1, 1],
[0, 0, 1, np.inf, 1, 1],
[0, 0, 0, 1, 1, 1],
[0, 0, 0, 0, 1, 1],
[0, 0, 0, 0, 0, np.inf],
[0, 0, 0, 0, 0, 0]],
dtype=np.float64
)
diamond_dm += diamond_dm.T

gens = ripser(diamond_dm, metric="precomputed", maxdim=2,
return_generators=True)["gens"]

gens_fin_dim1 = gens[1][1]

# With this example no finite generators in dimension 1 shall be found
assert len(gens_fin_dim1) == 0

0 comments on commit c5465b7

Please sign in to comment.