From 05b99fd83640307c5618090dec86d6e06f45487d Mon Sep 17 00:00:00 2001 From: julian Date: Fri, 28 Aug 2020 15:30:30 +0200 Subject: [PATCH 01/27] [WIP] Add bindings for collapser Currently supports Sparse and Dense representation as input returns a sparse matrix Signed-off-by: julian --- CMakeLists.txt | 24 +++++ .../externals/bindings/collapser_bindings.cpp | 90 +++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 gtda/externals/bindings/collapser_bindings.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index b6a514340..95bd63850 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -334,3 +334,27 @@ else() target_compile_options(gtda_cech_complex PUBLIC $<$:-O2 -ggdb -D_GLIBCXX_DEBUG>) endif() +####################################################################### +# Collapser # +####################################################################### + +pybind11_add_module(gtda_collapser "${BINDINGS_DIR}/collapser_bindings.cpp") + +if(OpenMP_FOUND) + target_link_libraries(gtda_collapser PRIVATE OpenMP::OpenMP_CXX) +endif() + +target_link_libraries(gtda_collapser LINK_PUBLIC ${Boost_LIBRARIES}) +target_compile_definitions(gtda_collapser PRIVATE BOOST_RESULT_OF_USE_DECLTYPE=1 BOOST_ALL_NO_LIB=1 BOOST_SYSTEM_NO_DEPRECATED=1) + +target_include_directories(gtda_collapser PRIVATE "${GUDHI_SRC_DIR}/common/include") +target_include_directories(gtda_collapser PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") +target_include_directories(gtda_collapser PRIVATE "${EIGEN_DIR}") + +if(MSVC) + target_compile_options(gtda_collapser PUBLIC $<$: /O2 /Wall /fp:strict>) + target_compile_options(gtda_collapser PUBLIC $<$:/O1 /DEBUG:FULL /Zi /Zo>) +else() + target_compile_options(gtda_collapser PUBLIC $<$: -Ofast -shared -pthread -fPIC -fwrapv -Wall -fno-strict-aliasing -frounding-math>) + target_compile_options(gtda_collapser PUBLIC $<$:-O2 -ggdb -D_GLIBCXX_DEBUG>) +endif() diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp new file mode 100644 index 000000000..29b5d5691 --- /dev/null +++ b/gtda/externals/bindings/collapser_bindings.cpp @@ -0,0 +1,90 @@ +/****************************************************************************** + * Description: gudhi's collapser interfacing with pybind11 + * License: AGPL3 + *****************************************************************************/ + +#include + +#include +#include +#include + +namespace py = pybind11; + +/* GUDHI Collapser require types */ +using Filtration_value = float; +using Vertex_handle = uint32_t; +using Filtered_edge = + std::tuple; +using Filtered_edge_list = std::vector; + +/* sparse matrix input types */ +using Sparse_matrix = Eigen::SparseMatrix; +using triplet_vec = Eigen::Triplet; + +/* dense matrix input types */ +using Distance_matrix = std::vector>; + +/* generates a sparse matrix from a filtered edges list + * This function is called after computing edges collapsing + */ +static Sparse_matrix generate_sparse_matrix( + Filtered_edge_list& collapse_edges) { + std::vector triplets; + /* Create triplets to efficiently create a return matrix */ + for (auto& t : collapse_edges) { + triplets.push_back( + triplet_vec(std::get<0>(t), std::get<1>(t), std::get<2>(t))); + } + + Sparse_matrix mat(collapse_edges.size(), collapse_edges.size()); + mat.setFromTriplets(triplets.begin(), triplets.end()); + + return mat; +} + +PYBIND11_MODULE(gtda_collapser, m) { + using namespace pybind11::literals; + + m.doc() = "Collapser bindings for Gudhi implementation"; + m.def("flag_complex_collapse_edges", + [](Sparse_matrix& graph_) { + Filtered_edge_list graph; + + /* Convert from Sparse format to Filtered edge list */ + for (size_t k = 0; k < graph_.outerSize(); ++k) + for (Eigen::SparseMatrix::InnerIterator it(graph_, + k); + it; ++it) { + graph.push_back(Filtered_edge(it.row(), it.col(), it.value())); + } + + /* Start collapser */ + auto vec_triples = + Gudhi::collapse::flag_complex_collapse_edges(graph); + return generate_sparse_matrix(vec_triples); + }, + "sparse_matrix"_a, + "Implicitly constructs a flag complex from edges, " + "collapses edges while preserving the persistent homology"); + + m.def("flag_complex_collapse_edges", + [](Distance_matrix& dm) { + Filtered_edge_list graph; + std::vector triplets; + + /* Convert from Sparse format to Filtered edge list */ + for (size_t i = 0; i < dm.size(); i++) + for (size_t j = 0; j < dm[i].size(); j++) + if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j])); + + /* Start collapser */ + auto vec_triples = + Gudhi::collapse::flag_complex_collapse_edges(graph); + + return generate_sparse_matrix(vec_triples); + }, + "dense_matrix"_a, + "Implicitly constructs a flag complex from edges, " + "collapses edges while preserving the persistent homology"); +} From 23dc59df59fd78e1c9c5a359418a9d057e198511 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Sat, 29 Aug 2020 21:35:39 +0200 Subject: [PATCH 02/27] Attempt fix of collapser bindings --- gtda/externals/bindings/collapser_bindings.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp index 29b5d5691..af87a4a60 100644 --- a/gtda/externals/bindings/collapser_bindings.cpp +++ b/gtda/externals/bindings/collapser_bindings.cpp @@ -29,15 +29,15 @@ using Distance_matrix = std::vector>; * This function is called after computing edges collapsing */ static Sparse_matrix generate_sparse_matrix( - Filtered_edge_list& collapse_edges) { + Filtered_edge_list& collapsed_edges, int size) { std::vector triplets; /* Create triplets to efficiently create a return matrix */ - for (auto& t : collapse_edges) { + for (auto& t : collapsed_edges) { triplets.push_back( triplet_vec(std::get<0>(t), std::get<1>(t), std::get<2>(t))); } - Sparse_matrix mat(collapse_edges.size(), collapse_edges.size()); + Sparse_matrix mat(size, size); mat.setFromTriplets(triplets.begin(), triplets.end()); return mat; @@ -52,7 +52,8 @@ PYBIND11_MODULE(gtda_collapser, m) { Filtered_edge_list graph; /* Convert from Sparse format to Filtered edge list */ - for (size_t k = 0; k < graph_.outerSize(); ++k) + int size = graph_.outerSize(); + for (size_t k = 0; k < size; ++k) for (Eigen::SparseMatrix::InnerIterator it(graph_, k); it; ++it) { @@ -62,7 +63,7 @@ PYBIND11_MODULE(gtda_collapser, m) { /* Start collapser */ auto vec_triples = Gudhi::collapse::flag_complex_collapse_edges(graph); - return generate_sparse_matrix(vec_triples); + return generate_sparse_matrix(vec_triples, size); }, "sparse_matrix"_a, "Implicitly constructs a flag complex from edges, " @@ -82,7 +83,7 @@ PYBIND11_MODULE(gtda_collapser, m) { auto vec_triples = Gudhi::collapse::flag_complex_collapse_edges(graph); - return generate_sparse_matrix(vec_triples); + return generate_sparse_matrix(vec_triples, dm.size()); }, "dense_matrix"_a, "Implicitly constructs a flag complex from edges, " From 88bfbfc11a007d335147a8aeae8d578d448f7564 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Sun, 30 Aug 2020 11:22:48 +0200 Subject: [PATCH 03/27] Split flag_complex_collapse_edges into a _sparse and a _dense version --- gtda/externals/bindings/collapser_bindings.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp index af87a4a60..f2c8227d8 100644 --- a/gtda/externals/bindings/collapser_bindings.cpp +++ b/gtda/externals/bindings/collapser_bindings.cpp @@ -11,22 +11,22 @@ namespace py = pybind11; -/* GUDHI Collapser require types */ +/* GUDHI Collapser required types */ using Filtration_value = float; using Vertex_handle = uint32_t; using Filtered_edge = std::tuple; using Filtered_edge_list = std::vector; -/* sparse matrix input types */ +/* Sparse matrix input types */ using Sparse_matrix = Eigen::SparseMatrix; using triplet_vec = Eigen::Triplet; -/* dense matrix input types */ +/* Dense matrix input types */ using Distance_matrix = std::vector>; -/* generates a sparse matrix from a filtered edges list - * This function is called after computing edges collapsing +/* Generates a sparse matrix from a filtered edges list + * This function is called after computing edge collapse */ static Sparse_matrix generate_sparse_matrix( Filtered_edge_list& collapsed_edges, int size) { @@ -47,11 +47,11 @@ PYBIND11_MODULE(gtda_collapser, m) { using namespace pybind11::literals; m.doc() = "Collapser bindings for Gudhi implementation"; - m.def("flag_complex_collapse_edges", + m.def("flag_complex_collapse_edges_sparse", [](Sparse_matrix& graph_) { Filtered_edge_list graph; - /* Convert from Sparse format to Filtered edge list */ + /* Convert from sparse format to Filtered_edge_list */ int size = graph_.outerSize(); for (size_t k = 0; k < size; ++k) for (Eigen::SparseMatrix::InnerIterator it(graph_, @@ -69,12 +69,12 @@ PYBIND11_MODULE(gtda_collapser, m) { "Implicitly constructs a flag complex from edges, " "collapses edges while preserving the persistent homology"); - m.def("flag_complex_collapse_edges", + m.def("flag_complex_collapse_edges_dense", [](Distance_matrix& dm) { Filtered_edge_list graph; std::vector triplets; - /* Convert from Sparse format to Filtered edge list */ + /* Convert from dense format to Filtered edge list */ for (size_t i = 0; i < dm.size(); i++) for (size_t j = 0; j < dm[i].size(); j++) if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j])); From 53f9c0ed8a972c343a9d12b5c520dd3cb022ce82 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Sun, 30 Aug 2020 11:25:13 +0200 Subject: [PATCH 04/27] Clean Cech test docstring --- gtda/externals/python/tests/test_cech_complex.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/gtda/externals/python/tests/test_cech_complex.py b/gtda/externals/python/tests/test_cech_complex.py index ae9534d8e..a81ce1aa4 100644 --- a/gtda/externals/python/tests/test_cech_complex.py +++ b/gtda/externals/python/tests/test_cech_complex.py @@ -1,9 +1,5 @@ from .. import CechComplex -""" Test comes from - -""" - def test_minimal_cech(): points = [[1, 2]] From 6141e69ccce30e04f13331051ca54845d88d7105 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Sun, 30 Aug 2020 13:39:17 +0200 Subject: [PATCH 05/27] Linting in ripser_interface --- gtda/externals/python/ripser_interface.py | 36 +++++++++++------------ 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index 92904699c..b8bbabf1e 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -111,17 +111,16 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", Parameters ---------- X: ndarray (n_samples, n_features) - A numpy array of either data or distance matrix. - Can also be a sparse distance matrix of type scipy.sparse + A numpy array of either data or distance matrix. Can also be a sparse + distance matrix of type scipy.sparse maxdim: int, optional, default 1 - Maximum homology dimension computed. Will compute all dimensions - lower than and equal to this value. - For 1, H_0 and H_1 will be computed. + Maximum homology dimension computed. Will compute all dimensions lower + than and equal to this value. For 1, H_0 and H_1 will be computed. - thresh: float, default infinity - Maximum distances considered when constructing filtration. - If infinity, compute the entire filtration. + thresh: float, default ``numpy.inf`` + Maximum distances considered when constructing filtration. If + ``numpy.inf``, compute the entire filtration. coeff: int prime, default 2 Compute homology with coefficients in the prime field Z/pZ for p=coeff. @@ -129,19 +128,18 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", metric: string or callable The metric to use when calculating distance between instances in a feature array. If metric is a string, it must be one of the options - specified in pairwise_distances, including "euclidean", "manhattan", - or "cosine". Alternatively, if metric is a callable function, it is - called on each pair of instances (rows) and the resulting value - recorded. The callable should take two arrays from X as input and - return a value indicating the distance between them. + specified in pairwise_distances, including "euclidean", "manhattan", or + "cosine". Alternatively, if metric is a callable function, it is called + on each pair of instances (rows) and the resulting value recorded. The + callable should take two arrays from X as input and return a value + indicating the distance between them. n_perm: int - The number of points to subsample in a "greedy permutation," - or a furthest point sampling of the points. These points - will be used in lieu of the full point cloud for a faster - computation, at the expense of some accuracy, which can - be bounded as a maximum bottleneck distance to all diagrams - on the original point set + The number of points to subsample in a "greedy permutation", or a + furthest point sampling of the points. These points will be used in + lieu of the full point cloud for a faster computation, at the expense + of some accuracy, which can be bounded as a maximum bottleneck distance + to all diagrams on the original point set. Returns ------- From 085d9b95723aa3cdb486178aa6afe0c2440898f4 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Sun, 30 Aug 2020 21:51:14 +0200 Subject: [PATCH 06/27] Make more collapser bindings which compile --- .../externals/bindings/collapser_bindings.cpp | 65 ++++++++++++++++++- 1 file changed, 62 insertions(+), 3 deletions(-) diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp index f2c8227d8..92d5ee9d3 100644 --- a/gtda/externals/bindings/collapser_bindings.cpp +++ b/gtda/externals/bindings/collapser_bindings.cpp @@ -22,10 +22,16 @@ using Filtered_edge_list = std::vector; using Sparse_matrix = Eigen::SparseMatrix; using triplet_vec = Eigen::Triplet; +/* COO data input types */ +using Row_idx = std::vector; +using Col_idx = std::vector; +using Filtration_values = std::vector; +using COO_data = std::tuple; + /* Dense matrix input types */ using Distance_matrix = std::vector>; -/* Generates a sparse matrix from a filtered edges list +/* Generates a sparse matrix from a filtered edge list * This function is called after computing edge collapse */ static Sparse_matrix generate_sparse_matrix( @@ -43,10 +49,27 @@ static Sparse_matrix generate_sparse_matrix( return mat; } +/* Generates COO sparse matrix data from a filtered edge list + * This function is called after computing edge collapse + */ +static COO_data coo_data_from_filtered_edge_list( + Filtered_edge_list& collapsed_edges) { + Row_idx row; + Col_idx col; + Filtration_values data; + for (auto& t : collapsed_edges) { + row.push_back(std::get<0>(t)); + col.push_back(std::get<1>(t)); + data.push_back(std::get<2>(t)); + } + + return COO_data(row, col, data); +} + PYBIND11_MODULE(gtda_collapser, m) { using namespace pybind11::literals; - m.doc() = "Collapser bindings for Gudhi implementation"; + m.doc() = "Collapser bindings for GUDHI implementation"; m.def("flag_complex_collapse_edges_sparse", [](Sparse_matrix& graph_) { Filtered_edge_list graph; @@ -69,10 +92,26 @@ PYBIND11_MODULE(gtda_collapser, m) { "Implicitly constructs a flag complex from edges, " "collapses edges while preserving the persistent homology"); + m.def("flag_complex_collapse_edges_from_coo_data_to_coo_data", + [](Row_idx& row, Col_idx& col, Filtration_values& data) { + Filtered_edge_list graph; + + /* Convert from COO input format to Filtered_edge_list */ + int size = data.size(); + for (size_t k = 0; k < size; ++k) graph.push_back(Filtered_edge(row[k], col[k], data[k])); + + /* Start collapser */ + auto vec_triples = + Gudhi::collapse::flag_complex_collapse_edges(graph); + return coo_data_from_filtered_edge_list(vec_triples); + }, + "row"_a, "column"_a, "data"_a, + "Implicitly constructs a flag complex from edges, " + "collapses edges while preserving the persistent homology"); + m.def("flag_complex_collapse_edges_dense", [](Distance_matrix& dm) { Filtered_edge_list graph; - std::vector triplets; /* Convert from dense format to Filtered edge list */ for (size_t i = 0; i < dm.size(); i++) @@ -88,4 +127,24 @@ PYBIND11_MODULE(gtda_collapser, m) { "dense_matrix"_a, "Implicitly constructs a flag complex from edges, " "collapses edges while preserving the persistent homology"); + + m.def("flag_complex_collapse_edges_from_dense_to_coo_data", + [](Distance_matrix& dm) { + Filtered_edge_list graph; + + /* Convert from dense format to Filtered edge list */ + for (size_t i = 0; i < dm.size(); i++) + for (size_t j = 0; j < dm[i].size(); j++) + if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j])); + + /* Start collapser */ + auto vec_triples = + Gudhi::collapse::flag_complex_collapse_edges(graph); + + return coo_data_from_filtered_edge_list(vec_triples); + }, + "dense_matrix"_a, + "Implicitly constructs a flag complex from edges, " + "collapses edges while preserving the persistent homology"); + } From 55d8c58583cd7e055b77f2c6a0be4f7c62151909 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Sun, 30 Aug 2020 22:56:52 +0200 Subject: [PATCH 07/27] Add collapse_edges kwarg to ripser to enable edge collapser --- gtda/externals/python/ripser_interface.py | 65 ++++++++++++++++++----- 1 file changed, 53 insertions(+), 12 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index b8bbabf1e..674cb0de8 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -1,7 +1,14 @@ from scipy import sparse import numpy as np from sklearn.metrics.pairwise import pairwise_distances -from ..modules import gtda_ripser, gtda_ripser_coeff +from ..modules import gtda_ripser, gtda_ripser_coeff, gtda_collapser + + +def _lexsort_coo_data(row, col, data): + lex_sort_idx = np.lexsort((col, row)) + row, col, data = \ + row[lex_sort_idx], col[lex_sort_idx], data[lex_sort_idx] + return row, col, data def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=1): @@ -100,11 +107,11 @@ def get_greedy_perm(X, n_perm=None, metric="euclidean"): ds = np.minimum(ds, dperm2all[-1]) lambdas[-1] = np.max(ds) dperm2all = np.array(dperm2all) - return (idx_perm, lambdas, dperm2all) + return idx_perm, lambdas, dperm2all def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", - n_perm=None): + n_perm=None, collapse_edges=False): """Compute persistence diagrams for X data array. If X is not a distance matrix, it will be converted to a distance matrix using the chosen metric. @@ -141,6 +148,8 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", of some accuracy, which can be bounded as a maximum bottleneck distance to all diagrams on the original point set. + collapse_edges : TODO + Returns ------- A dictionary holding all of the results of the computation @@ -205,18 +214,29 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", dm = sparse.coo_matrix(dm) if sparse.issparse(dm): - if sparse.isspmatrix_coo(dm): + if sparse.isspmatrix_coo(dm) and not collapse_edges: # If the matrix is already COO, we need to order the row and column # indices lexicographically to avoid errors. See # https://github.com/scikit-tda/ripser.py/issues/103 - row, col, data = dm.row, dm.col, dm.data - lex_sort_idx = np.lexsort((col, row)) - row, col, data = \ - row[lex_sort_idx], col[lex_sort_idx], data[lex_sort_idx] + row, col, data = _lexsort_coo_data(dm.row, dm.col, dm.data) else: # Lexicographic sorting is performed by scipy upon conversion coo = dm.tocoo() row, col, data = coo.row, coo.col, coo.data + if collapse_edges: + # Threshold first. Ideally, this would be implemented as a + # modification of the C++ code + if thresh < np.inf: + thresholded_idx = np.flatnonzero(data <= thresh) + data = data[thresholded_idx] + row = row[thresholded_idx] + col = col[thresholded_idx] + row, col, data = \ + gtda_collapser.flag_complex_collapse_edges_from_coo_data_to_coo_data(row, col, data) + row, col, data = _lexsort_coo_data( + np.asarray(row), np.asarray(col), np.asarray(data) + ) + res = DRFDMSparse( row.astype(dtype=np.int32, order="C"), col.astype(dtype=np.int32, order="C"), @@ -224,12 +244,33 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", n_points, maxdim, thresh, - coeff, + coeff ) else: - I, J = np.meshgrid(np.arange(n_points), np.arange(n_points)) - DParam = np.array(dm[I > J], dtype=np.float32) - res = DRFDM(DParam, maxdim, thresh, coeff) + if collapse_edges: + # Threshold first. Ideally, this would be implemented as a + # modification of the C++ code + if thresh < np.inf: + dm = np.where(dm <= thresh, dm, np.inf) + row, col, data = \ + gtda_collapser.flag_complex_collapse_edges_from_dense_to_coo_data(dm) + row, col, data = \ + _lexsort_coo_data(np.asarray(row), np.asarray(col), + np.asarray(data)) + res = DRFDMSparse( + row.astype(dtype=np.int32, order="C"), + col.astype(dtype=np.int32, order="C"), + np.array(data, dtype=np.float32, order="C"), + n_points, + maxdim, + thresh, + coeff + ) + else: + # Only consider upper diagonal + I, J = np.meshgrid(np.arange(n_points), np.arange(n_points)) + DParam = np.array(dm[I > J], dtype=np.float32) + res = DRFDM(DParam, maxdim, thresh, coeff) # Unwrap persistence diagrams dgms = res["births_and_deaths_by_dim"] From 0b6663caa70574269e64e925624c84d09412556f Mon Sep 17 00:00:00 2001 From: julian Date: Mon, 31 Aug 2020 13:53:37 +0200 Subject: [PATCH 08/27] Update collapser bindings * All functions have the same name, supporting different input types * All functions returns now tuple Signed-off-by: julian --- gtda/externals/__init__.py | 8 ++- .../externals/bindings/collapser_bindings.cpp | 60 ++++++++----------- 2 files changed, 31 insertions(+), 37 deletions(-) diff --git a/gtda/externals/__init__.py b/gtda/externals/__init__.py index 00bffafff..3e800ecfb 100644 --- a/gtda/externals/__init__.py +++ b/gtda/externals/__init__.py @@ -3,18 +3,20 @@ from .modules.gtda_bottleneck import bottleneck_distance from .modules.gtda_wasserstein import wasserstein_distance +from .modules.gtda_collapser import flag_complex_collapse_edges from .python import ripser, SparseRipsComplex, CechComplex, CubicalComplex, \ - PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex + PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex \ __all__ = [ 'bottleneck_distance', 'wasserstein_distance', 'ripser', 'SparseRipsComplex', - 'CechComplex', + 'CechComplex', 'CubicalComplex', 'PeriodicCubicalComplex', 'SimplexTree', 'WitnessComplex', - 'StrongWitnessComplex' + 'StrongWitnessComplex', + 'flag_complex_collapse_edges' ] diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp index 92d5ee9d3..b823c4b7f 100644 --- a/gtda/externals/bindings/collapser_bindings.cpp +++ b/gtda/externals/bindings/collapser_bindings.cpp @@ -23,9 +23,9 @@ using Sparse_matrix = Eigen::SparseMatrix; using triplet_vec = Eigen::Triplet; /* COO data input types */ -using Row_idx = std::vector; -using Col_idx = std::vector; -using Filtration_values = std::vector; +using Row_idx = std::vector; +using Col_idx = std::vector; +using Filtration_values = std::vector; using COO_data = std::tuple; /* Dense matrix input types */ @@ -33,14 +33,18 @@ using Distance_matrix = std::vector>; /* Generates a sparse matrix from a filtered edge list * This function is called after computing edge collapse + * At the moment this function is unused because Eigen only manages + * CSR sparse format, but in the case of giotto-tda we need COO format */ -static Sparse_matrix generate_sparse_matrix( - Filtered_edge_list& collapsed_edges, int size) { +static Sparse_matrix generate_sparse_matrix(Filtered_edge_list& collapsed_edges, + size_t size) { std::vector triplets; /* Create triplets to efficiently create a return matrix */ for (auto& t : collapsed_edges) { triplets.push_back( triplet_vec(std::get<0>(t), std::get<1>(t), std::get<2>(t))); + std::cout << std::get<0>(t) << ", " << std::get<1>(t) << " : " + << std::get<2>(t) << "\n"; } Sparse_matrix mat(size, size); @@ -52,11 +56,16 @@ static Sparse_matrix generate_sparse_matrix( /* Generates COO sparse matrix data from a filtered edge list * This function is called after computing edge collapse */ -static COO_data coo_data_from_filtered_edge_list( - Filtered_edge_list& collapsed_edges) { +static COO_data gen_coo_matrix(Filtered_edge_list& collapsed_edges) { Row_idx row; Col_idx col; Filtration_values data; + + /* allocate memory beforehand */ + row.reserve(collapsed_edges.size()); + col.reserve(collapsed_edges.size()); + data.reserve(collapsed_edges.size()); + for (auto& t : collapsed_edges) { row.push_back(std::get<0>(t)); col.push_back(std::get<1>(t)); @@ -70,7 +79,7 @@ PYBIND11_MODULE(gtda_collapser, m) { using namespace pybind11::literals; m.doc() = "Collapser bindings for GUDHI implementation"; - m.def("flag_complex_collapse_edges_sparse", + m.def("flag_complex_collapse_edges", [](Sparse_matrix& graph_) { Filtered_edge_list graph; @@ -86,49 +95,33 @@ PYBIND11_MODULE(gtda_collapser, m) { /* Start collapser */ auto vec_triples = Gudhi::collapse::flag_complex_collapse_edges(graph); - return generate_sparse_matrix(vec_triples, size); + + return gen_coo_matrix(vec_triples); }, "sparse_matrix"_a, "Implicitly constructs a flag complex from edges, " "collapses edges while preserving the persistent homology"); - m.def("flag_complex_collapse_edges_from_coo_data_to_coo_data", + m.def("flag_complex_collapse_edges", [](Row_idx& row, Col_idx& col, Filtration_values& data) { Filtered_edge_list graph; /* Convert from COO input format to Filtered_edge_list */ int size = data.size(); - for (size_t k = 0; k < size; ++k) graph.push_back(Filtered_edge(row[k], col[k], data[k])); - - /* Start collapser */ - auto vec_triples = - Gudhi::collapse::flag_complex_collapse_edges(graph); - return coo_data_from_filtered_edge_list(vec_triples); - }, - "row"_a, "column"_a, "data"_a, - "Implicitly constructs a flag complex from edges, " - "collapses edges while preserving the persistent homology"); - - m.def("flag_complex_collapse_edges_dense", - [](Distance_matrix& dm) { - Filtered_edge_list graph; - - /* Convert from dense format to Filtered edge list */ - for (size_t i = 0; i < dm.size(); i++) - for (size_t j = 0; j < dm[i].size(); j++) - if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j])); + for (size_t k = 0; k < size; ++k) + graph.push_back(Filtered_edge(row[k], col[k], data[k])); /* Start collapser */ auto vec_triples = Gudhi::collapse::flag_complex_collapse_edges(graph); - return generate_sparse_matrix(vec_triples, dm.size()); + return gen_coo_matrix(vec_triples); }, - "dense_matrix"_a, + "row"_a, "column"_a, "data"_a, "Implicitly constructs a flag complex from edges, " "collapses edges while preserving the persistent homology"); - m.def("flag_complex_collapse_edges_from_dense_to_coo_data", + m.def("flag_complex_collapse_edges", [](Distance_matrix& dm) { Filtered_edge_list graph; @@ -141,10 +134,9 @@ PYBIND11_MODULE(gtda_collapser, m) { auto vec_triples = Gudhi::collapse::flag_complex_collapse_edges(graph); - return coo_data_from_filtered_edge_list(vec_triples); + return gen_coo_matrix(vec_triples); }, "dense_matrix"_a, "Implicitly constructs a flag complex from edges, " "collapses edges while preserving the persistent homology"); - } From ce7f5137b3e7746b4ae4bf59dc3eadfc8da62dab Mon Sep 17 00:00:00 2001 From: julian Date: Mon, 31 Aug 2020 13:55:47 +0200 Subject: [PATCH 09/27] Add test for the collapser bindings Signed-off-by: julian --- gtda/externals/python/tests/test_collapser.py | 55 +++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 gtda/externals/python/tests/test_collapser.py diff --git a/gtda/externals/python/tests/test_collapser.py b/gtda/externals/python/tests/test_collapser.py new file mode 100644 index 000000000..14cf1f915 --- /dev/null +++ b/gtda/externals/python/tests/test_collapser.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +""" Test comes from +https://github.com/GUDHI/gudhi-devel/blob/master/src/Collapse/example/edge_collapse_basic_example.cpp +""" + + +import numpy as np +import scipy +from scipy.sparse import coo_matrix, csr_matrix +from gtda.externals.modules.gtda_collapser import flag_complex_collapse_edges + + +X = np.array([[0, 1, 1.], + [1, 2, 1.], + [2, 3, 1.], + [3, 0, 1.], + [0, 2, 2.], + [1, 3, 2.]], dtype=np.int32) +tX = np.transpose(X) + + +def check_collapse(collapsed, removed): + coo = collapsed.tocoo() + cooT = np.array([coo.row, coo.col, coo.data]).transpose() + for elem in removed: + if (cooT == elem).all(axis=1).any(): + print(elem) + return False + + return True + + +def test_simple_csr_example(): + X = csr_matrix((tX[2], (tX[0], tX[1]))) + coo_ = flag_complex_collapse_edges(X) + coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) + assert check_collapse(coo, + [[1, 3, 2]]) + + +def test_simple_coo_example(): + coo_ = flag_complex_collapse_edges( + tX[0], tX[1], tX[2]) + coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) + assert check_collapse(coo, + [[1, 3, 2]]) + + +def test_simple_dense_example(): + data = csr_matrix((tX[2], (tX[0], tX[1]))).tocoo() + coo_ = flag_complex_collapse_edges(data) + coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) + assert check_collapse(coo, + [[1, 3, 2]]) From 0365642a37093c11503bb2874b5f8cb1539b6117 Mon Sep 17 00:00:00 2001 From: julian Date: Mon, 31 Aug 2020 13:56:04 +0200 Subject: [PATCH 10/27] Update azure-pipeline ccache to allow building them again Signed-off-by: julian --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 8536b80de..1a91e56a1 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -41,7 +41,7 @@ jobs: - task: Cache@2 inputs: - key: '"ccache-wheels-v2020.08.28" | $(Agent.OS) | "$(python.version)"' + key: '"ccache-wheels-v2020.08.31" | $(Agent.OS) | "$(python.version)"' path: $(CCACHE_DIR) displayName: ccache @@ -136,7 +136,7 @@ jobs: - task: Cache@2 inputs: - key: '"ccache-v2020.08.28" | $(Agent.OS) | "$(python.version)"' + key: '"ccache-v2020.08.31" | $(Agent.OS) | "$(python.version)"' path: $(CCACHE_DIR) displayName: ccache From 57abf4f4bfd5cf80a8f89391bbddfa818c2459e1 Mon Sep 17 00:00:00 2001 From: julian Date: Tue, 1 Sep 2020 13:50:39 +0200 Subject: [PATCH 11/27] Update bindings names and add threshold parameter Eigen constructor allows vector of vector, this clashes with one of our input Therefore, we choose to use a different name for each input type Signed-off-by: julian --- gtda/externals/__init__.py | 7 +++- .../externals/bindings/collapser_bindings.cpp | 39 ++++++++++++------- gtda/externals/python/tests/test_collapser.py | 13 ++++--- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/gtda/externals/__init__.py b/gtda/externals/__init__.py index 3e800ecfb..7d0461352 100644 --- a/gtda/externals/__init__.py +++ b/gtda/externals/__init__.py @@ -3,7 +3,8 @@ from .modules.gtda_bottleneck import bottleneck_distance from .modules.gtda_wasserstein import wasserstein_distance -from .modules.gtda_collapser import flag_complex_collapse_edges +from .modules.gtda_collapser import flag_complex_collapse_edges_dense, \ + flag_complex_collapse_edges_sparse, flag_complex_collapse_edges_coo from .python import ripser, SparseRipsComplex, CechComplex, CubicalComplex, \ PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex \ @@ -18,5 +19,7 @@ 'SimplexTree', 'WitnessComplex', 'StrongWitnessComplex', - 'flag_complex_collapse_edges' + 'flag_complex_collapse_edges_dense', + 'flag_complex_collapse_edges_sparse', + 'flag_complex_collapse_edges_coo' ] diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp index b823c4b7f..7416ea478 100644 --- a/gtda/externals/bindings/collapser_bindings.cpp +++ b/gtda/externals/bindings/collapser_bindings.cpp @@ -31,6 +31,9 @@ using COO_data = std::tuple; /* Dense matrix input types */ using Distance_matrix = std::vector>; +/* constants */ +const Filtration_value filtration_max = + std::numeric_limits::infinity(); /* Generates a sparse matrix from a filtered edge list * This function is called after computing edge collapse * At the moment this function is unused because Eigen only manages @@ -79,17 +82,18 @@ PYBIND11_MODULE(gtda_collapser, m) { using namespace pybind11::literals; m.doc() = "Collapser bindings for GUDHI implementation"; - m.def("flag_complex_collapse_edges", - [](Sparse_matrix& graph_) { + m.def("flag_complex_collapse_edges_sparse", + [](Sparse_matrix& sm, Filtration_value thresh = filtration_max) { Filtered_edge_list graph; /* Convert from sparse format to Filtered_edge_list */ - int size = graph_.outerSize(); + /* Applying threshold to the input data */ + int size = sm.outerSize(); for (size_t k = 0; k < size; ++k) - for (Eigen::SparseMatrix::InnerIterator it(graph_, - k); + for (Eigen::SparseMatrix::InnerIterator it(sm, k); it; ++it) { - graph.push_back(Filtered_edge(it.row(), it.col(), it.value())); + if (it.value() <= thresh) + graph.push_back(Filtered_edge(it.row(), it.col(), it.value())); } /* Start collapser */ @@ -98,18 +102,21 @@ PYBIND11_MODULE(gtda_collapser, m) { return gen_coo_matrix(vec_triples); }, - "sparse_matrix"_a, + "sm"_a, "thresh"_a = filtration_max, "Implicitly constructs a flag complex from edges, " "collapses edges while preserving the persistent homology"); - m.def("flag_complex_collapse_edges", - [](Row_idx& row, Col_idx& col, Filtration_values& data) { + m.def("flag_complex_collapse_edges_coo", + [](Row_idx& row, Col_idx& col, Filtration_values& data, + Filtration_value thresh = filtration_max) { Filtered_edge_list graph; /* Convert from COO input format to Filtered_edge_list */ + /* Applying threshold to the input data */ int size = data.size(); for (size_t k = 0; k < size; ++k) - graph.push_back(Filtered_edge(row[k], col[k], data[k])); + if (data[k] <= thresh) + graph.push_back(Filtered_edge(row[k], col[k], data[k])); /* Start collapser */ auto vec_triples = @@ -117,18 +124,20 @@ PYBIND11_MODULE(gtda_collapser, m) { return gen_coo_matrix(vec_triples); }, - "row"_a, "column"_a, "data"_a, + "row"_a, "column"_a, "data"_a, "thresh"_a = filtration_max, "Implicitly constructs a flag complex from edges, " "collapses edges while preserving the persistent homology"); - m.def("flag_complex_collapse_edges", - [](Distance_matrix& dm) { + m.def("flag_complex_collapse_edges_dense", + [](Distance_matrix& dm, Filtration_value thresh = filtration_max) { Filtered_edge_list graph; /* Convert from dense format to Filtered edge list */ + /* Applying threshold to the input data */ for (size_t i = 0; i < dm.size(); i++) for (size_t j = 0; j < dm[i].size(); j++) - if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j])); + if (j > i && (dm[i][j] <= thresh)) + graph.push_back(Filtered_edge(i, j, dm[i][j])); /* Start collapser */ auto vec_triples = @@ -136,7 +145,7 @@ PYBIND11_MODULE(gtda_collapser, m) { return gen_coo_matrix(vec_triples); }, - "dense_matrix"_a, + "dm"_a, "thresh"_a = filtration_max, "Implicitly constructs a flag complex from edges, " "collapses edges while preserving the persistent homology"); } diff --git a/gtda/externals/python/tests/test_collapser.py b/gtda/externals/python/tests/test_collapser.py index 14cf1f915..8b56d63a0 100644 --- a/gtda/externals/python/tests/test_collapser.py +++ b/gtda/externals/python/tests/test_collapser.py @@ -8,7 +8,10 @@ import numpy as np import scipy from scipy.sparse import coo_matrix, csr_matrix -from gtda.externals.modules.gtda_collapser import flag_complex_collapse_edges +from gtda.externals.modules.gtda_collapser import \ + flag_complex_collapse_edges_dense, \ + flag_complex_collapse_edges_sparse, \ + flag_complex_collapse_edges_coo X = np.array([[0, 1, 1.], @@ -33,14 +36,14 @@ def check_collapse(collapsed, removed): def test_simple_csr_example(): X = csr_matrix((tX[2], (tX[0], tX[1]))) - coo_ = flag_complex_collapse_edges(X) + coo_ = flag_complex_collapse_edges_sparse(X) coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) assert check_collapse(coo, [[1, 3, 2]]) def test_simple_coo_example(): - coo_ = flag_complex_collapse_edges( + coo_ = flag_complex_collapse_edges_coo( tX[0], tX[1], tX[2]) coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) assert check_collapse(coo, @@ -48,8 +51,8 @@ def test_simple_coo_example(): def test_simple_dense_example(): - data = csr_matrix((tX[2], (tX[0], tX[1]))).tocoo() - coo_ = flag_complex_collapse_edges(data) + data = csr_matrix((tX[2], (tX[0], tX[1]))).toarray() + coo_ = flag_complex_collapse_edges_dense(data) coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) assert check_collapse(coo, [[1, 3, 2]]) From 50a9948455e5deffbdb99d910563603f3ae688e4 Mon Sep 17 00:00:00 2001 From: julian Date: Tue, 1 Sep 2020 13:51:05 +0200 Subject: [PATCH 12/27] Remove unused function from bindings Signed-off-by: julian --- .../externals/bindings/collapser_bindings.cpp | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp index 7416ea478..90a542fe7 100644 --- a/gtda/externals/bindings/collapser_bindings.cpp +++ b/gtda/externals/bindings/collapser_bindings.cpp @@ -34,27 +34,6 @@ using Distance_matrix = std::vector>; /* constants */ const Filtration_value filtration_max = std::numeric_limits::infinity(); -/* Generates a sparse matrix from a filtered edge list - * This function is called after computing edge collapse - * At the moment this function is unused because Eigen only manages - * CSR sparse format, but in the case of giotto-tda we need COO format - */ -static Sparse_matrix generate_sparse_matrix(Filtered_edge_list& collapsed_edges, - size_t size) { - std::vector triplets; - /* Create triplets to efficiently create a return matrix */ - for (auto& t : collapsed_edges) { - triplets.push_back( - triplet_vec(std::get<0>(t), std::get<1>(t), std::get<2>(t))); - std::cout << std::get<0>(t) << ", " << std::get<1>(t) << " : " - << std::get<2>(t) << "\n"; - } - - Sparse_matrix mat(size, size); - mat.setFromTriplets(triplets.begin(), triplets.end()); - - return mat; -} /* Generates COO sparse matrix data from a filtered edge list * This function is called after computing edge collapse From c375597088ea97bc10018bf2145634d933b6e195 Mon Sep 17 00:00:00 2001 From: julian Date: Tue, 1 Sep 2020 14:30:43 +0200 Subject: [PATCH 13/27] Refactor ripser_interface Signed-off-by: julian --- gtda/externals/python/ripser_interface.py | 74 +++++++++-------------- 1 file changed, 28 insertions(+), 46 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index 674cb0de8..b9ffd8304 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -213,29 +213,30 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", # births dm = sparse.coo_matrix(dm) - if sparse.issparse(dm): - if sparse.isspmatrix_coo(dm) and not collapse_edges: - # If the matrix is already COO, we need to order the row and column - # indices lexicographically to avoid errors. See - # https://github.com/scikit-tda/ripser.py/issues/103 - row, col, data = _lexsort_coo_data(dm.row, dm.col, dm.data) - else: - # Lexicographic sorting is performed by scipy upon conversion - coo = dm.tocoo() - row, col, data = coo.row, coo.col, coo.data + if sparse.issparse(dm) or collapse_edges: if collapse_edges: - # Threshold first. Ideally, this would be implemented as a - # modification of the C++ code - if thresh < np.inf: - thresholded_idx = np.flatnonzero(data <= thresh) - data = data[thresholded_idx] - row = row[thresholded_idx] - col = col[thresholded_idx] - row, col, data = \ - gtda_collapser.flag_complex_collapse_edges_from_coo_data_to_coo_data(row, col, data) - row, col, data = _lexsort_coo_data( - np.asarray(row), np.asarray(col), np.asarray(data) - ) + if not sparse.issparse(dm): + row, col, data = \ + gtda_collapser.flag_complex_collapse_edges_dense(dm) + else: + coo = dm.tocoo() + row, col, data = \ + gtda_collapser.flag_complex_collapse_edges_coo(coo.row, + coo.col, + coo.data) + else: + if sparse.isspmatrix_coo(dm): + # If the matrix is already COO, we need to order the row and + # column indices lexicographically to avoid errors. See + # https://github.com/scikit-tda/ripser.py/issues/103 + row, col, data = _lexsort_coo_data(dm.row, dm.col, dm.data) + else: + coo = dm.tocoo() + row, col, data = coo.row, coo.col, coo.data + + row, col, data = _lexsort_coo_data(np.asarray(row), + np.asarray(col), + np.asarray(data)) res = DRFDMSparse( row.astype(dtype=np.int32, order="C"), @@ -247,30 +248,11 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", coeff ) else: - if collapse_edges: - # Threshold first. Ideally, this would be implemented as a - # modification of the C++ code - if thresh < np.inf: - dm = np.where(dm <= thresh, dm, np.inf) - row, col, data = \ - gtda_collapser.flag_complex_collapse_edges_from_dense_to_coo_data(dm) - row, col, data = \ - _lexsort_coo_data(np.asarray(row), np.asarray(col), - np.asarray(data)) - res = DRFDMSparse( - row.astype(dtype=np.int32, order="C"), - col.astype(dtype=np.int32, order="C"), - np.array(data, dtype=np.float32, order="C"), - n_points, - maxdim, - thresh, - coeff - ) - else: - # Only consider upper diagonal - I, J = np.meshgrid(np.arange(n_points), np.arange(n_points)) - DParam = np.array(dm[I > J], dtype=np.float32) - res = DRFDM(DParam, maxdim, thresh, coeff) + # Only consider upper diagonal + I, J = np.meshgrid(np.arange(n_points), np.arange(n_points)) + DParam = np.array(dm[I > J], dtype=np.float32) + print(DParam) + res = DRFDM(DParam, maxdim, thresh, coeff) # Unwrap persistence diagrams dgms = res["births_and_deaths_by_dim"] From ead89ed9eb0e790ddccb30e7e51299664fccf1fd Mon Sep 17 00:00:00 2001 From: julian Date: Tue, 1 Sep 2020 14:47:10 +0200 Subject: [PATCH 14/27] Add test cases for ripser bindings with collapser Signed-off-by: julian --- gtda/externals/python/tests/test_ripser.py | 77 ++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 gtda/externals/python/tests/test_ripser.py diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py new file mode 100644 index 000000000..cd11a2a52 --- /dev/null +++ b/gtda/externals/python/tests/test_ripser.py @@ -0,0 +1,77 @@ +import numpy as np +import scipy as sp +from .. import ripser + +X = np.random.random((50, 50)) +# Without this line, there might be different results because GUDHI +# assumes zeros in the diagonal +np.fill_diagonal(X, 0) +maxdim = 2 + + +def test_with_collapser(): + diags_collapsed = ripser( + X, + metric='precomputed', + maxdim=maxdim, + collapse_edges=True)['dgms'] + diags_not_collapsed = ripser( + X, + metric='precomputed', + maxdim=maxdim, + collapse_edges=False)['dgms'] + + for i in range(maxdim): + assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i]) + + +def test_with_collapser_with_tresh(): + thresh = 0.1 + diags_collapsed_thresh = ripser( + X, + metric='precomputed', + maxdim=maxdim, + thresh=thresh, + collapse_edges=True)['dgms'] + diags_not_collapsed_thresh = ripser( + X, metric='precomputed', maxdim=maxdim, thresh=thresh, + collapse_edges=False)['dgms'] + + for i in range(maxdim): + assert np.array_equal(diags_collapsed_thresh[i], + diags_not_collapsed_thresh[i]) + + +def test_with_collapser_coo(): + diags_collapsed = ripser( + sp.sparse.coo_matrix(X), + metric='precomputed', + maxdim=maxdim, + collapse_edges=True)['dgms'] + diags_not_collapsed = ripser( + sp.sparse.coo_matrix(X), + metric='precomputed', + maxdim=maxdim, + collapse_edges=False)['dgms'] + + for i in range(maxdim): + assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i]) + + +def test_with_collapser_coo_thresh(): + thresh = 0.1 + diags_collapsed = ripser( + sp.sparse.coo_matrix(X), + metric='precomputed', + maxdim=maxdim, + thresh=thresh, + collapse_edges=True)['dgms'] + diags_not_collapsed = ripser( + sp.sparse.coo_matrix(X), + metric='precomputed', + maxdim=maxdim, + thresh=thresh, + collapse_edges=False)['dgms'] + + for i in range(maxdim): + assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i]) From ca0138622c5e2d7cc7b22628fddccd9c972b0ad5 Mon Sep 17 00:00:00 2001 From: Umberto Lupo <46537483+ulupo@users.noreply.github.com> Date: Tue, 1 Sep 2020 15:33:58 +0200 Subject: [PATCH 15/27] Remove unnecessary backslash --- gtda/externals/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtda/externals/__init__.py b/gtda/externals/__init__.py index 7d0461352..b39577ddf 100644 --- a/gtda/externals/__init__.py +++ b/gtda/externals/__init__.py @@ -6,7 +6,7 @@ from .modules.gtda_collapser import flag_complex_collapse_edges_dense, \ flag_complex_collapse_edges_sparse, flag_complex_collapse_edges_coo from .python import ripser, SparseRipsComplex, CechComplex, CubicalComplex, \ - PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex \ + PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex __all__ = [ 'bottleneck_distance', From 4fb660aad5ef55501829ac7f29827e8433e18a19 Mon Sep 17 00:00:00 2001 From: Umberto Lupo <46537483+ulupo@users.noreply.github.com> Date: Wed, 2 Sep 2020 09:31:44 +0200 Subject: [PATCH 16/27] Simplify code for upper diagonal extraction in dense case --- gtda/externals/python/ripser_interface.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index b9ffd8304..d03512295 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -248,10 +248,9 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", coeff ) else: - # Only consider upper diagonal - I, J = np.meshgrid(np.arange(n_points), np.arange(n_points)) - DParam = np.array(dm[I > J], dtype=np.float32) - print(DParam) + # Only consider strict upper diagonal + idx = np.triu_indices(n_points, 1) + DParam = dm[idx].astype(np.float32) res = DRFDM(DParam, maxdim, thresh, coeff) # Unwrap persistence diagrams From 533aba754da90495e948918899a740686d82a857 Mon Sep 17 00:00:00 2001 From: julian Date: Thu, 3 Sep 2020 08:07:46 +0200 Subject: [PATCH 17/27] Add Flag to indicate when to sort data before computing persistent homology Signed-off-by: julian --- gtda/externals/python/ripser_interface.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index d03512295..b667c25e0 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -214,7 +214,11 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", dm = sparse.coo_matrix(dm) if sparse.issparse(dm) or collapse_edges: + # It's not necessary to apply sort in all cases + sort_coo = False + if collapse_edges: + sort_coo = True if not sparse.issparse(dm): row, col, data = \ gtda_collapser.flag_complex_collapse_edges_dense(dm) @@ -229,14 +233,17 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", # If the matrix is already COO, we need to order the row and # column indices lexicographically to avoid errors. See # https://github.com/scikit-tda/ripser.py/issues/103 - row, col, data = _lexsort_coo_data(dm.row, dm.col, dm.data) + sort_coo = True + row, col, data = dm.row, dm.col, dm.data else: + sort_coo = False coo = dm.tocoo() row, col, data = coo.row, coo.col, coo.data - row, col, data = _lexsort_coo_data(np.asarray(row), - np.asarray(col), - np.asarray(data)) + if sort_coo: + row, col, data = _lexsort_coo_data(np.asarray(row), + np.asarray(col), + np.asarray(data)) res = DRFDMSparse( row.astype(dtype=np.int32, order="C"), From b4b9404ad49b4a1186cd78323aa641359fced2cb Mon Sep 17 00:00:00 2001 From: julian Date: Thu, 3 Sep 2020 08:32:39 +0200 Subject: [PATCH 18/27] Fix test for ripser When it's an sparse matrix, it should be upper/lower triangular or mirrored on the diagonal => i,j == j,i Signed-off-by: julian --- gtda/externals/python/tests/test_ripser.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py index cd11a2a52..484bd5e29 100644 --- a/gtda/externals/python/tests/test_ripser.py +++ b/gtda/externals/python/tests/test_ripser.py @@ -43,13 +43,14 @@ def test_with_collapser_with_tresh(): def test_with_collapser_coo(): + X_tri = np.triu(X, 1) diags_collapsed = ripser( - sp.sparse.coo_matrix(X), + sp.sparse.coo_matrix(X_tri), metric='precomputed', maxdim=maxdim, collapse_edges=True)['dgms'] diags_not_collapsed = ripser( - sp.sparse.coo_matrix(X), + sp.sparse.coo_matrix(X_tri), metric='precomputed', maxdim=maxdim, collapse_edges=False)['dgms'] @@ -60,14 +61,15 @@ def test_with_collapser_coo(): def test_with_collapser_coo_thresh(): thresh = 0.1 + X_tri = np.triu(X, 1) diags_collapsed = ripser( - sp.sparse.coo_matrix(X), + sp.sparse.coo_matrix(X_tri), metric='precomputed', maxdim=maxdim, thresh=thresh, collapse_edges=True)['dgms'] diags_not_collapsed = ripser( - sp.sparse.coo_matrix(X), + sp.sparse.coo_matrix(X_tri), metric='precomputed', maxdim=maxdim, thresh=thresh, From f35c82057b6d578ef091c13ac9c7aaef0654360c Mon Sep 17 00:00:00 2001 From: julian Date: Thu, 3 Sep 2020 08:33:43 +0200 Subject: [PATCH 19/27] Add threshold directly into the collapser Signed-off-by: julian --- gtda/externals/python/ripser_interface.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index b667c25e0..17fce1651 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -221,13 +221,15 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", sort_coo = True if not sparse.issparse(dm): row, col, data = \ - gtda_collapser.flag_complex_collapse_edges_dense(dm) + gtda_collapser.flag_complex_collapse_edges_dense(dm, + thresh) else: coo = dm.tocoo() row, col, data = \ gtda_collapser.flag_complex_collapse_edges_coo(coo.row, coo.col, - coo.data) + coo.data, + thresh) else: if sparse.isspmatrix_coo(dm): # If the matrix is already COO, we need to order the row and From 071b1685e5989237cd95537e402ea114465aaec0 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Mon, 7 Sep 2020 15:32:45 +0200 Subject: [PATCH 20/27] Add references to edge collapse paper --- gtda/externals/python/ripser_interface.py | 63 +++++++++++++++++------ 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index 17fce1651..8854e9219 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -112,43 +112,51 @@ def get_greedy_perm(X, n_perm=None, metric="euclidean"): def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", n_perm=None, collapse_edges=False): - """Compute persistence diagrams for X data array. If X is not a distance - matrix, it will be converted to a distance matrix using the chosen metric. + """Compute persistence diagrams for X data array using Ripser [1]_. + + If X is not a distance matrix, it will be converted to a distance matrix + using the chosen metric. Parameters ---------- - X: ndarray (n_samples, n_features) + X : ndarray of shape (n_samples, n_features) A numpy array of either data or distance matrix. Can also be a sparse distance matrix of type scipy.sparse - maxdim: int, optional, default 1 + maxdim : int, optional, default: ``1`` Maximum homology dimension computed. Will compute all dimensions lower than and equal to this value. For 1, H_0 and H_1 will be computed. - thresh: float, default ``numpy.inf`` + thresh : float, optional, default: ``numpy.inf`` Maximum distances considered when constructing filtration. If ``numpy.inf``, compute the entire filtration. - coeff: int prime, default 2 + coeff : int prime, optional, default: ``2`` Compute homology with coefficients in the prime field Z/pZ for p=coeff. - metric: string or callable + metric : string or callable, optional, default: ``'euclidean'`` The metric to use when calculating distance between instances in a - feature array. If metric is a string, it must be one of the options - specified in pairwise_distances, including "euclidean", "manhattan", or - "cosine". Alternatively, if metric is a callable function, it is called - on each pair of instances (rows) and the resulting value recorded. The - callable should take two arrays from X as input and return a value - indicating the distance between them. - - n_perm: int + feature array. If set to ``"precomputed"``, input data is interpreted + as a distance matrix or of adjacency matrices of a weighted undirected + graph. If a string, it must be one of the options allowed by + :func:`scipy.spatial.distance.pdist` for its metric parameter, or a + or a metric listed in + :obj:`sklearn.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`, including + ``'euclidean'``, ``'manhattan'`` or ``'cosine'``. If a callable, it + should take pairs of vectors (1D arrays) as input and, for each two + vectors in a pair, it should return a scalar indicating the + distance/dissimilarity between them. + + n_perm : int or None, optional, default: ``None`` The number of points to subsample in a "greedy permutation", or a furthest point sampling of the points. These points will be used in lieu of the full point cloud for a faster computation, at the expense of some accuracy, which can be bounded as a maximum bottleneck distance to all diagrams on the original point set. - collapse_edges : TODO + collapse_edges : bool, optional, default: ``False`` + Whether to use the edge collapse algorithm as described in [2]_ prior + to calling ``ripser``. Returns ------- @@ -174,6 +182,29 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", If n_perm <= 0, then the full point cloud was used and this is 0 } + References + ---------- + [1] U. Bauer, "Ripser: efficient computation of Vietoris–Rips persistence \ + barcodes", 2019; `arXiv:1908.02518 \ + `_. + + [2] J.-D. Boissonnat and S. Pritam, "Edge Collapse and Persistence of \ + Flag Complexes"; in *36th International Symposium on Computational \ + Geometry (SoCG 2020)*, pp. 19:1–19:15, Schloss + Dagstuhl-Leibniz–Zentrum für Informatik, 2020; + `DOI: 10.4230/LIPIcs.SoCG.2020.19 \ + `_. + + Notes + ----- + `Ripser `_ is used as a C++ backend + for computing Vietoris–Rips persistent homology. Python bindings were + modified for performance from the `ripser.py + `_ package. + + `GUDHI `_ is used as a C++ backend + for the edge collapse algorithm described in [2]_. + """ if n_perm and sparse.issparse(X): raise Exception( From b87897e2502835063dc084ebe7f3623adc975f12 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Mon, 7 Sep 2020 15:34:54 +0200 Subject: [PATCH 21/27] Guess number of points using max of shape tuple instead of first shape Avoids having to reshape sparse arrays explicitly --- gtda/externals/python/ripser_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index 8854e9219..443d5e184 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -136,7 +136,7 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", metric : string or callable, optional, default: ``'euclidean'`` The metric to use when calculating distance between instances in a - feature array. If set to ``"precomputed"``, input data is interpreted + feature array. If set to ``'precomputed'``, input data is interpreted as a distance matrix or of adjacency matrices of a weighted undirected graph. If a string, it must be one of the options allowed by :func:`scipy.spatial.distance.pdist` for its metric parameter, or a @@ -236,7 +236,7 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", dm = pairwise_distances(X, metric=metric) dperm2all = dm - n_points = dm.shape[0] + n_points = max(dm.shape) if not sparse.issparse(dm) and np.sum(np.abs(dm.diagonal()) > 0) > 0: # If any of the diagonal elements are nonzero, # convert to sparse format, because currently From dc083ba6d25e306416b2ca4efa5f7eeba1d57415 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Mon, 7 Sep 2020 15:38:07 +0200 Subject: [PATCH 22/27] Simplify lexsort flag logic --- gtda/externals/python/ripser_interface.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index 443d5e184..35afe6118 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -269,7 +269,6 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", sort_coo = True row, col, data = dm.row, dm.col, dm.data else: - sort_coo = False coo = dm.tocoo() row, col, data = coo.row, coo.col, coo.data From fe30c82bee5ef228f995219fcc87edc97e6e1c94 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Mon, 7 Sep 2020 18:42:44 +0200 Subject: [PATCH 23/27] Small refactor of ripser interface to restrict collapse to data without non-zero vertex weights --- gtda/externals/python/ripser_interface.py | 29 ++++++++++++++--------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index 35afe6118..d61dd8de5 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -1,6 +1,9 @@ -from scipy import sparse +from warnings import warn + import numpy as np +from scipy import sparse from sklearn.metrics.pairwise import pairwise_distances + from ..modules import gtda_ripser, gtda_ripser_coeff, gtda_collapser @@ -237,17 +240,21 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", dperm2all = dm n_points = max(dm.shape) - if not sparse.issparse(dm) and np.sum(np.abs(dm.diagonal()) > 0) > 0: - # If any of the diagonal elements are nonzero, - # convert to sparse format, because currently - # that's the only format that handles nonzero - # births - dm = sparse.coo_matrix(dm) + sort_coo = True + if (dm.diagonal() != 0).any(): + if collapse_edges: + warn("Edge collapses are not supported when any of the diagonal " + "entries are non-zero. Computing persistent homology without " + "using edge collapse.") + collapse_edges = False + if not sparse.issparse(dm): + # If any of the diagonal elements are nonzero, convert to sparse + # format, because currently that's the only format that handles + # nonzero births + dm = sparse.coo_matrix(dm) + sort_coo = False if sparse.issparse(dm) or collapse_edges: - # It's not necessary to apply sort in all cases - sort_coo = False - if collapse_edges: sort_coo = True if not sparse.issparse(dm): @@ -266,11 +273,11 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", # If the matrix is already COO, we need to order the row and # column indices lexicographically to avoid errors. See # https://github.com/scikit-tda/ripser.py/issues/103 - sort_coo = True row, col, data = dm.row, dm.col, dm.data else: coo = dm.tocoo() row, col, data = coo.row, coo.col, coo.data + sort_coo = False if sort_coo: row, col, data = _lexsort_coo_data(np.asarray(row), From 701f6f9b4ab6a1e6bb5545f416f52583458b81ac Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Mon, 7 Sep 2020 18:44:18 +0200 Subject: [PATCH 24/27] Add hypothesis tests for collapser Check for barcode equalities --- gtda/externals/python/tests/test_collapser.py | 90 ++++++++++++++++--- 1 file changed, 79 insertions(+), 11 deletions(-) diff --git a/gtda/externals/python/tests/test_collapser.py b/gtda/externals/python/tests/test_collapser.py index 8b56d63a0..7d5ac8d6c 100644 --- a/gtda/externals/python/tests/test_collapser.py +++ b/gtda/externals/python/tests/test_collapser.py @@ -4,15 +4,19 @@ https://github.com/GUDHI/gudhi-devel/blob/master/src/Collapse/example/edge_collapse_basic_example.cpp """ - import numpy as np -import scipy -from scipy.sparse import coo_matrix, csr_matrix +import pytest from gtda.externals.modules.gtda_collapser import \ flag_complex_collapse_edges_dense, \ flag_complex_collapse_edges_sparse, \ flag_complex_collapse_edges_coo +from hypothesis import given +from hypothesis.extra.numpy import arrays +from hypothesis.strategies import floats, integers, composite +from numpy.testing import assert_almost_equal +from scipy.sparse import coo_matrix, csr_matrix +from gtda.externals import ripser X = np.array([[0, 1, 1.], [1, 2, 1.], @@ -28,9 +32,7 @@ def check_collapse(collapsed, removed): cooT = np.array([coo.row, coo.col, coo.data]).transpose() for elem in removed: if (cooT == elem).all(axis=1).any(): - print(elem) return False - return True @@ -38,21 +40,87 @@ def test_simple_csr_example(): X = csr_matrix((tX[2], (tX[0], tX[1]))) coo_ = flag_complex_collapse_edges_sparse(X) coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) - assert check_collapse(coo, - [[1, 3, 2]]) + assert check_collapse(coo, [[1, 3, 2]]) def test_simple_coo_example(): coo_ = flag_complex_collapse_edges_coo( tX[0], tX[1], tX[2]) coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) - assert check_collapse(coo, - [[1, 3, 2]]) + assert check_collapse(coo, [[1, 3, 2]]) def test_simple_dense_example(): data = csr_matrix((tX[2], (tX[0], tX[1]))).toarray() coo_ = flag_complex_collapse_edges_dense(data) coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) - assert check_collapse(coo, - [[1, 3, 2]]) + assert check_collapse(coo, [[1, 3, 2]]) + + +@composite +def get_dense_distance_matrices(draw): + """Generate 2d dense square arrays of floats, with zero along the + diagonal.""" + shapes = draw(integers(min_value=2, max_value=30)) + distance_matrix = draw(arrays(dtype=np.float, + elements=floats(allow_nan=False, + allow_infinity=True, + min_value=0), + shape=(shapes, shapes), unique=False)) + np.fill_diagonal(distance_matrix, 0) + return distance_matrix + + +@composite +def get_sparse_distance_matrices(draw): + """Generate 2d sparse matrices of floats, with zero along the diagonal.""" + shapes = draw(integers(min_value=2, max_value=30)) + distance_matrix = draw(arrays(dtype=np.float, + elements=floats(allow_nan=False, + allow_infinity=True, + min_value=0), + shape=(shapes, shapes), unique=False)) + distance_matrix = np.triu(distance_matrix, k=1) + distance_matrix = coo_matrix(distance_matrix) + row, col, data = \ + distance_matrix.row, distance_matrix.col, distance_matrix.data + not_inf_idx = data != np.inf + row = row[not_inf_idx] + col = col[not_inf_idx] + data = data[not_inf_idx] + shape = (np.max(row) + 1, np.max(col) + 1) if not_inf_idx.sum() else (0, 0) + distance_matrix = coo_matrix((data, (row, col)), shape=shape) + return distance_matrix + + +@pytest.mark.parametrize('thresh', [False, True]) +@given(distance_matrix=get_dense_distance_matrices()) +def test_collapse_consistent_with_no_collapse_dense(thresh, distance_matrix): + thresh = np.max(distance_matrix) / 2 if thresh else np.inf + maxdim = 3 + pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, + metric='precomputed', collapse_edges=True)['dgms'] + pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, + metric='precomputed', collapse_edges=False)['dgms'] + for i in range(maxdim + 1): + pd_collapse[i] = np.sort(pd_collapse[i], axis=0) + pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0) + assert_almost_equal(pd_collapse[i], pd_no_collapse[i]) + + +@pytest.mark.parametrize('thresh', [False, True]) +@given(distance_matrix=get_sparse_distance_matrices()) +def test_collapse_consistent_with_no_collapse_sparse(thresh, distance_matrix): + if thresh and distance_matrix.nnz: + thresh = np.max(distance_matrix) / 2 + else: + thresh = np.inf + maxdim = 3 + pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, + metric='precomputed', collapse_edges=True)['dgms'] + pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, + metric='precomputed', collapse_edges=False)['dgms'] + for i in range(maxdim + 1): + pd_collapse[i] = np.sort(pd_collapse[i], axis=0) + pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0) + assert_almost_equal(pd_collapse[i], pd_no_collapse[i]) From 0565a382eed1ea56e0ba99849a5b3b18646ba733 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 8 Sep 2020 15:37:40 +0200 Subject: [PATCH 25/27] Place hypothesis tests in test_ripser and remove superseded tests there. Add regression test for #465 --- gtda/externals/python/tests/test_collapser.py | 76 --------- gtda/externals/python/tests/test_ripser.py | 152 ++++++++++-------- 2 files changed, 85 insertions(+), 143 deletions(-) diff --git a/gtda/externals/python/tests/test_collapser.py b/gtda/externals/python/tests/test_collapser.py index 7d5ac8d6c..19b64340c 100644 --- a/gtda/externals/python/tests/test_collapser.py +++ b/gtda/externals/python/tests/test_collapser.py @@ -5,19 +5,12 @@ """ import numpy as np -import pytest from gtda.externals.modules.gtda_collapser import \ flag_complex_collapse_edges_dense, \ flag_complex_collapse_edges_sparse, \ flag_complex_collapse_edges_coo -from hypothesis import given -from hypothesis.extra.numpy import arrays -from hypothesis.strategies import floats, integers, composite -from numpy.testing import assert_almost_equal from scipy.sparse import coo_matrix, csr_matrix -from gtda.externals import ripser - X = np.array([[0, 1, 1.], [1, 2, 1.], [2, 3, 1.], @@ -55,72 +48,3 @@ def test_simple_dense_example(): coo_ = flag_complex_collapse_edges_dense(data) coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) assert check_collapse(coo, [[1, 3, 2]]) - - -@composite -def get_dense_distance_matrices(draw): - """Generate 2d dense square arrays of floats, with zero along the - diagonal.""" - shapes = draw(integers(min_value=2, max_value=30)) - distance_matrix = draw(arrays(dtype=np.float, - elements=floats(allow_nan=False, - allow_infinity=True, - min_value=0), - shape=(shapes, shapes), unique=False)) - np.fill_diagonal(distance_matrix, 0) - return distance_matrix - - -@composite -def get_sparse_distance_matrices(draw): - """Generate 2d sparse matrices of floats, with zero along the diagonal.""" - shapes = draw(integers(min_value=2, max_value=30)) - distance_matrix = draw(arrays(dtype=np.float, - elements=floats(allow_nan=False, - allow_infinity=True, - min_value=0), - shape=(shapes, shapes), unique=False)) - distance_matrix = np.triu(distance_matrix, k=1) - distance_matrix = coo_matrix(distance_matrix) - row, col, data = \ - distance_matrix.row, distance_matrix.col, distance_matrix.data - not_inf_idx = data != np.inf - row = row[not_inf_idx] - col = col[not_inf_idx] - data = data[not_inf_idx] - shape = (np.max(row) + 1, np.max(col) + 1) if not_inf_idx.sum() else (0, 0) - distance_matrix = coo_matrix((data, (row, col)), shape=shape) - return distance_matrix - - -@pytest.mark.parametrize('thresh', [False, True]) -@given(distance_matrix=get_dense_distance_matrices()) -def test_collapse_consistent_with_no_collapse_dense(thresh, distance_matrix): - thresh = np.max(distance_matrix) / 2 if thresh else np.inf - maxdim = 3 - pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, - metric='precomputed', collapse_edges=True)['dgms'] - pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, - metric='precomputed', collapse_edges=False)['dgms'] - for i in range(maxdim + 1): - pd_collapse[i] = np.sort(pd_collapse[i], axis=0) - pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0) - assert_almost_equal(pd_collapse[i], pd_no_collapse[i]) - - -@pytest.mark.parametrize('thresh', [False, True]) -@given(distance_matrix=get_sparse_distance_matrices()) -def test_collapse_consistent_with_no_collapse_sparse(thresh, distance_matrix): - if thresh and distance_matrix.nnz: - thresh = np.max(distance_matrix) / 2 - else: - thresh = np.inf - maxdim = 3 - pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, - metric='precomputed', collapse_edges=True)['dgms'] - pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, - metric='precomputed', collapse_edges=False)['dgms'] - for i in range(maxdim + 1): - pd_collapse[i] = np.sort(pd_collapse[i], axis=0) - pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0) - assert_almost_equal(pd_collapse[i], pd_no_collapse[i]) diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py index 484bd5e29..d47708610 100644 --- a/gtda/externals/python/tests/test_ripser.py +++ b/gtda/externals/python/tests/test_ripser.py @@ -1,79 +1,97 @@ import numpy as np -import scipy as sp -from .. import ripser +import pytest +from hypothesis import given +from hypothesis.extra.numpy import arrays +from hypothesis.strategies import floats, integers, composite +from numpy.testing import assert_almost_equal +from scipy.sparse import coo_matrix -X = np.random.random((50, 50)) -# Without this line, there might be different results because GUDHI -# assumes zeros in the diagonal -np.fill_diagonal(X, 0) -maxdim = 2 +from gtda.externals import ripser -def test_with_collapser(): - diags_collapsed = ripser( - X, - metric='precomputed', - maxdim=maxdim, - collapse_edges=True)['dgms'] - diags_not_collapsed = ripser( - X, - metric='precomputed', - maxdim=maxdim, - collapse_edges=False)['dgms'] +@composite +def get_dense_distance_matrices(draw): + """Generate 2d dense square arrays of floats, with zero along the + diagonal.""" + shapes = draw(integers(min_value=2, max_value=30)) + distance_matrix = draw(arrays(dtype=np.float, + elements=floats(allow_nan=False, + allow_infinity=True, + min_value=0), + shape=(shapes, shapes), unique=False)) + np.fill_diagonal(distance_matrix, 0) + return distance_matrix - for i in range(maxdim): - assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i]) +@composite +def get_sparse_distance_matrices(draw): + """Generate 2d sparse matrices of floats, with zero along the diagonal.""" + shapes = draw(integers(min_value=2, max_value=40)) + distance_matrix = draw(arrays(dtype=np.float, + elements=floats(allow_nan=False, + allow_infinity=True, + min_value=0), + shape=(shapes, shapes), unique=False)) + distance_matrix = np.triu(distance_matrix, k=1) + distance_matrix = coo_matrix(distance_matrix) + row, col, data = \ + distance_matrix.row, distance_matrix.col, distance_matrix.data + not_inf_idx = data != np.inf + row = row[not_inf_idx] + col = col[not_inf_idx] + data = data[not_inf_idx] + shape = (np.max(row) + 1, np.max(col) + 1) if not_inf_idx.any() else (0, 0) + distance_matrix = coo_matrix((data, (row, col)), shape=shape) + return distance_matrix -def test_with_collapser_with_tresh(): - thresh = 0.1 - diags_collapsed_thresh = ripser( - X, - metric='precomputed', - maxdim=maxdim, - thresh=thresh, - collapse_edges=True)['dgms'] - diags_not_collapsed_thresh = ripser( - X, metric='precomputed', maxdim=maxdim, thresh=thresh, - collapse_edges=False)['dgms'] - for i in range(maxdim): - assert np.array_equal(diags_collapsed_thresh[i], - diags_not_collapsed_thresh[i]) +@pytest.mark.parametrize('thresh', [False, True]) +@given(distance_matrix=get_dense_distance_matrices()) +def test_collapse_consistent_with_no_collapse_dense(thresh, distance_matrix): + thresh = np.max(distance_matrix) / 2 if thresh else np.inf + maxdim = 3 + pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, + metric='precomputed', collapse_edges=True)['dgms'] + pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, + metric='precomputed', collapse_edges=False)['dgms'] + for i in range(maxdim + 1): + pd_collapse[i] = np.sort(pd_collapse[i], axis=0) + pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0) + assert_almost_equal(pd_collapse[i], pd_no_collapse[i]) -def test_with_collapser_coo(): - X_tri = np.triu(X, 1) - diags_collapsed = ripser( - sp.sparse.coo_matrix(X_tri), - metric='precomputed', - maxdim=maxdim, - collapse_edges=True)['dgms'] - diags_not_collapsed = ripser( - sp.sparse.coo_matrix(X_tri), - metric='precomputed', - maxdim=maxdim, - collapse_edges=False)['dgms'] +@pytest.mark.parametrize('thresh', [False, True]) +@given(distance_matrix=get_sparse_distance_matrices()) +def test_collapse_consistent_with_no_collapse_coo(thresh, distance_matrix): + if thresh and distance_matrix.nnz: + thresh = np.max(distance_matrix) / 2 + else: + thresh = np.inf + maxdim = 3 + pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, + metric='precomputed', collapse_edges=True)['dgms'] + pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim, + metric='precomputed', collapse_edges=False)['dgms'] + for i in range(maxdim + 1): + pd_collapse[i] = np.sort(pd_collapse[i], axis=0) + pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0) + assert_almost_equal(pd_collapse[i], pd_no_collapse[i]) - for i in range(maxdim): - assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i]) - -def test_with_collapser_coo_thresh(): - thresh = 0.1 - X_tri = np.triu(X, 1) - diags_collapsed = ripser( - sp.sparse.coo_matrix(X_tri), - metric='precomputed', - maxdim=maxdim, - thresh=thresh, - collapse_edges=True)['dgms'] - diags_not_collapsed = ripser( - sp.sparse.coo_matrix(X_tri), - metric='precomputed', - maxdim=maxdim, - thresh=thresh, - collapse_edges=False)['dgms'] - - for i in range(maxdim): - assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i]) +def test_coo_results_independent_of_order(): + """Regression test for PR #465""" + data = np.array([6., 8., 2., 4., 5., 9., 10., 3., 1., 1.]) + row = np.array([0, 0, 0, 0, 1, 1, 1, 2, 2, 3]) + col = np.array([4, 1, 3, 2, 4, 3, 2, 3, 4, 4]) + dm = coo_matrix((data, (row, col))) + diagrams = ripser(dm, metric="precomputed")['dgms'] + diagrams_csr = ripser(dm.tocsr(), metric="precomputed")['dgms'] + expected = [np.array([[0., 1.], + [0., 1.], + [0., 2.], + [0., 5.], + [0., np.inf]]), + np.array([], shape=(0, 2), dtype=np.float64)] + for i in range(2): + assert np.array_equal(diagrams[i], expected[i]) + assert np.array_equal(diagrams_csr[i], expected[i]) From 4f91a1b20332adfdb9af1d97d29fbddb5292bf29 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 8 Sep 2020 15:56:02 +0200 Subject: [PATCH 26/27] Fix array shape --- gtda/externals/python/tests/test_ripser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py index d47708610..1b6bb4955 100644 --- a/gtda/externals/python/tests/test_ripser.py +++ b/gtda/externals/python/tests/test_ripser.py @@ -91,7 +91,7 @@ def test_coo_results_independent_of_order(): [0., 2.], [0., 5.], [0., np.inf]]), - np.array([], shape=(0, 2), dtype=np.float64)] + np.array([], dtype=np.float64).reshape(0, 2)] for i in range(2): assert np.array_equal(diagrams[i], expected[i]) assert np.array_equal(diagrams_csr[i], expected[i]) From 2e9e574411e4e20262f0359b61551205da4d0e29 Mon Sep 17 00:00:00 2001 From: julian Date: Tue, 8 Sep 2020 16:03:34 +0200 Subject: [PATCH 27/27] Disable computation of cocycles by default Signed-off-by: julian --- gtda/externals/python/ripser_interface.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py index d61dd8de5..077b277cb 100644 --- a/gtda/externals/python/ripser_interface.py +++ b/gtda/externals/python/ripser_interface.py @@ -14,7 +14,7 @@ def _lexsort_coo_data(row, col, data): return row, col, data -def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=1): +def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=0): if coeff == 2: ret = gtda_ripser.rips_dm(DParam, DParam.shape[0], coeff, maxHomDim, thresh, do_cocycles) @@ -27,7 +27,7 @@ def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=1): return ret_rips -def DRFDMSparse(I, J, V, N, maxHomDim, thresh=-1, coeff=2, do_cocycles=1): +def DRFDMSparse(I, J, V, N, maxHomDim, thresh=-1, coeff=2, do_cocycles=0): if coeff == 2: ret = gtda_ripser.rips_dm_sparse(I, J, V, I.size, N, coeff, maxHomDim, thresh, do_cocycles)