From 05b99fd83640307c5618090dec86d6e06f45487d Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Fri, 28 Aug 2020 15:30:30 +0200
Subject: [PATCH 01/27] [WIP] Add bindings for collapser

Currently supports Sparse and Dense representation as input
returns a sparse matrix

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 CMakeLists.txt                                | 24 +++++
 .../externals/bindings/collapser_bindings.cpp | 90 +++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 gtda/externals/bindings/collapser_bindings.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index b6a514340..95bd63850 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -334,3 +334,27 @@ else()
     target_compile_options(gtda_cech_complex PUBLIC $<$<CONFIG:DEBUG>:-O2 -ggdb -D_GLIBCXX_DEBUG>)
 endif()
 
+#######################################################################
+#                              Collapser                              #
+#######################################################################
+
+pybind11_add_module(gtda_collapser "${BINDINGS_DIR}/collapser_bindings.cpp")
+
+if(OpenMP_FOUND)
+    target_link_libraries(gtda_collapser PRIVATE OpenMP::OpenMP_CXX)
+endif()
+
+target_link_libraries(gtda_collapser LINK_PUBLIC ${Boost_LIBRARIES})
+target_compile_definitions(gtda_collapser PRIVATE BOOST_RESULT_OF_USE_DECLTYPE=1 BOOST_ALL_NO_LIB=1 BOOST_SYSTEM_NO_DEPRECATED=1)
+
+target_include_directories(gtda_collapser PRIVATE "${GUDHI_SRC_DIR}/common/include")
+target_include_directories(gtda_collapser PRIVATE "${GUDHI_SRC_DIR}/Collapse/include")
+target_include_directories(gtda_collapser PRIVATE "${EIGEN_DIR}")
+
+if(MSVC)
+    target_compile_options(gtda_collapser PUBLIC $<$<CONFIG:RELEASE>: /O2 /Wall /fp:strict>)
+    target_compile_options(gtda_collapser PUBLIC $<$<CONFIG:DEBUG>:/O1 /DEBUG:FULL /Zi /Zo>)
+else()
+    target_compile_options(gtda_collapser PUBLIC $<$<CONFIG:RELEASE>: -Ofast -shared -pthread -fPIC -fwrapv -Wall -fno-strict-aliasing -frounding-math>)
+    target_compile_options(gtda_collapser PUBLIC $<$<CONFIG:DEBUG>:-O2 -ggdb -D_GLIBCXX_DEBUG>)
+endif()
diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp
new file mode 100644
index 000000000..29b5d5691
--- /dev/null
+++ b/gtda/externals/bindings/collapser_bindings.cpp
@@ -0,0 +1,90 @@
+/******************************************************************************
+ * Description:      gudhi's collapser interfacing with pybind11
+ * License:          AGPL3
+ *****************************************************************************/
+
+#include <gudhi/Flag_complex_edge_collapser.h>
+
+#include <pybind11/eigen.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+namespace py = pybind11;
+
+/* GUDHI Collapser require types */
+using Filtration_value = float;
+using Vertex_handle = uint32_t;
+using Filtered_edge =
+    std::tuple<Vertex_handle, Vertex_handle, Filtration_value>;
+using Filtered_edge_list = std::vector<Filtered_edge>;
+
+/* sparse matrix input types */
+using Sparse_matrix = Eigen::SparseMatrix<Filtration_value>;
+using triplet_vec = Eigen::Triplet<Filtration_value>;
+
+/* dense matrix input types */
+using Distance_matrix = std::vector<std::vector<Filtration_value>>;
+
+/* generates a sparse matrix from a filtered edges list
+ * This function is called after computing edges collapsing
+ */
+static Sparse_matrix generate_sparse_matrix(
+    Filtered_edge_list& collapse_edges) {
+  std::vector<triplet_vec> triplets;
+  /* Create triplets to efficiently create a return matrix */
+  for (auto& t : collapse_edges) {
+    triplets.push_back(
+        triplet_vec(std::get<0>(t), std::get<1>(t), std::get<2>(t)));
+  }
+
+  Sparse_matrix mat(collapse_edges.size(), collapse_edges.size());
+  mat.setFromTriplets(triplets.begin(), triplets.end());
+
+  return mat;
+}
+
+PYBIND11_MODULE(gtda_collapser, m) {
+  using namespace pybind11::literals;
+
+  m.doc() = "Collapser bindings for Gudhi  implementation";
+  m.def("flag_complex_collapse_edges",
+        [](Sparse_matrix& graph_) {
+          Filtered_edge_list graph;
+
+          /* Convert from Sparse format to Filtered edge list */
+          for (size_t k = 0; k < graph_.outerSize(); ++k)
+            for (Eigen::SparseMatrix<Filtration_value>::InnerIterator it(graph_,
+                                                                         k);
+                 it; ++it) {
+              graph.push_back(Filtered_edge(it.row(), it.col(), it.value()));
+            }
+
+          /* Start collapser */
+          auto vec_triples =
+              Gudhi::collapse::flag_complex_collapse_edges(graph);
+          return generate_sparse_matrix(vec_triples);
+        },
+        "sparse_matrix"_a,
+        "Implicitly constructs a flag complex from edges, "
+        "collapses edges while preserving the persistent homology");
+
+  m.def("flag_complex_collapse_edges",
+        [](Distance_matrix& dm) {
+          Filtered_edge_list graph;
+          std::vector<triplet_vec> triplets;
+
+          /* Convert from Sparse format to Filtered edge list */
+          for (size_t i = 0; i < dm.size(); i++)
+            for (size_t j = 0; j < dm[i].size(); j++)
+              if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j]));
+
+          /* Start collapser */
+          auto vec_triples =
+              Gudhi::collapse::flag_complex_collapse_edges(graph);
+
+          return generate_sparse_matrix(vec_triples);
+        },
+        "dense_matrix"_a,
+        "Implicitly constructs a flag complex from edges, "
+        "collapses edges while preserving the persistent homology");
+}

From 23dc59df59fd78e1c9c5a359418a9d057e198511 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Sat, 29 Aug 2020 21:35:39 +0200
Subject: [PATCH 02/27] Attempt fix of collapser bindings

---
 gtda/externals/bindings/collapser_bindings.cpp | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp
index 29b5d5691..af87a4a60 100644
--- a/gtda/externals/bindings/collapser_bindings.cpp
+++ b/gtda/externals/bindings/collapser_bindings.cpp
@@ -29,15 +29,15 @@ using Distance_matrix = std::vector<std::vector<Filtration_value>>;
  * This function is called after computing edges collapsing
  */
 static Sparse_matrix generate_sparse_matrix(
-    Filtered_edge_list& collapse_edges) {
+    Filtered_edge_list& collapsed_edges, int size) {
   std::vector<triplet_vec> triplets;
   /* Create triplets to efficiently create a return matrix */
-  for (auto& t : collapse_edges) {
+  for (auto& t : collapsed_edges) {
     triplets.push_back(
         triplet_vec(std::get<0>(t), std::get<1>(t), std::get<2>(t)));
   }
 
-  Sparse_matrix mat(collapse_edges.size(), collapse_edges.size());
+  Sparse_matrix mat(size, size);
   mat.setFromTriplets(triplets.begin(), triplets.end());
 
   return mat;
@@ -52,7 +52,8 @@ PYBIND11_MODULE(gtda_collapser, m) {
           Filtered_edge_list graph;
 
           /* Convert from Sparse format to Filtered edge list */
-          for (size_t k = 0; k < graph_.outerSize(); ++k)
+          int size = graph_.outerSize();
+          for (size_t k = 0; k < size; ++k)
             for (Eigen::SparseMatrix<Filtration_value>::InnerIterator it(graph_,
                                                                          k);
                  it; ++it) {
@@ -62,7 +63,7 @@ PYBIND11_MODULE(gtda_collapser, m) {
           /* Start collapser */
           auto vec_triples =
               Gudhi::collapse::flag_complex_collapse_edges(graph);
-          return generate_sparse_matrix(vec_triples);
+          return generate_sparse_matrix(vec_triples, size);
         },
         "sparse_matrix"_a,
         "Implicitly constructs a flag complex from edges, "
@@ -82,7 +83,7 @@ PYBIND11_MODULE(gtda_collapser, m) {
           auto vec_triples =
               Gudhi::collapse::flag_complex_collapse_edges(graph);
 
-          return generate_sparse_matrix(vec_triples);
+          return generate_sparse_matrix(vec_triples, dm.size());
         },
         "dense_matrix"_a,
         "Implicitly constructs a flag complex from edges, "

From 88bfbfc11a007d335147a8aeae8d578d448f7564 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Sun, 30 Aug 2020 11:22:48 +0200
Subject: [PATCH 03/27] Split flag_complex_collapse_edges into a _sparse and a
 _dense version

---
 gtda/externals/bindings/collapser_bindings.cpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp
index af87a4a60..f2c8227d8 100644
--- a/gtda/externals/bindings/collapser_bindings.cpp
+++ b/gtda/externals/bindings/collapser_bindings.cpp
@@ -11,22 +11,22 @@
 
 namespace py = pybind11;
 
-/* GUDHI Collapser require types */
+/* GUDHI Collapser required types */
 using Filtration_value = float;
 using Vertex_handle = uint32_t;
 using Filtered_edge =
     std::tuple<Vertex_handle, Vertex_handle, Filtration_value>;
 using Filtered_edge_list = std::vector<Filtered_edge>;
 
-/* sparse matrix input types */
+/* Sparse matrix input types */
 using Sparse_matrix = Eigen::SparseMatrix<Filtration_value>;
 using triplet_vec = Eigen::Triplet<Filtration_value>;
 
-/* dense matrix input types */
+/* Dense matrix input types */
 using Distance_matrix = std::vector<std::vector<Filtration_value>>;
 
-/* generates a sparse matrix from a filtered edges list
- * This function is called after computing edges collapsing
+/* Generates a sparse matrix from a filtered edges list
+ * This function is called after computing edge collapse
  */
 static Sparse_matrix generate_sparse_matrix(
     Filtered_edge_list& collapsed_edges, int size) {
@@ -47,11 +47,11 @@ PYBIND11_MODULE(gtda_collapser, m) {
   using namespace pybind11::literals;
 
   m.doc() = "Collapser bindings for Gudhi  implementation";
-  m.def("flag_complex_collapse_edges",
+  m.def("flag_complex_collapse_edges_sparse",
         [](Sparse_matrix& graph_) {
           Filtered_edge_list graph;
 
-          /* Convert from Sparse format to Filtered edge list */
+          /* Convert from sparse format to Filtered_edge_list */
           int size = graph_.outerSize();
           for (size_t k = 0; k < size; ++k)
             for (Eigen::SparseMatrix<Filtration_value>::InnerIterator it(graph_,
@@ -69,12 +69,12 @@ PYBIND11_MODULE(gtda_collapser, m) {
         "Implicitly constructs a flag complex from edges, "
         "collapses edges while preserving the persistent homology");
 
-  m.def("flag_complex_collapse_edges",
+  m.def("flag_complex_collapse_edges_dense",
         [](Distance_matrix& dm) {
           Filtered_edge_list graph;
           std::vector<triplet_vec> triplets;
 
-          /* Convert from Sparse format to Filtered edge list */
+          /* Convert from dense format to Filtered edge list */
           for (size_t i = 0; i < dm.size(); i++)
             for (size_t j = 0; j < dm[i].size(); j++)
               if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j]));

From 53f9c0ed8a972c343a9d12b5c520dd3cb022ce82 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Sun, 30 Aug 2020 11:25:13 +0200
Subject: [PATCH 04/27] Clean Cech test docstring

---
 gtda/externals/python/tests/test_cech_complex.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/gtda/externals/python/tests/test_cech_complex.py b/gtda/externals/python/tests/test_cech_complex.py
index ae9534d8e..a81ce1aa4 100644
--- a/gtda/externals/python/tests/test_cech_complex.py
+++ b/gtda/externals/python/tests/test_cech_complex.py
@@ -1,9 +1,5 @@
 from .. import CechComplex
 
-""" Test comes from
-
-"""
-
 
 def test_minimal_cech():
     points = [[1, 2]]

From 6141e69ccce30e04f13331051ca54845d88d7105 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Sun, 30 Aug 2020 13:39:17 +0200
Subject: [PATCH 05/27] Linting in ripser_interface

---
 gtda/externals/python/ripser_interface.py | 36 +++++++++++------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index 92904699c..b8bbabf1e 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -111,17 +111,16 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
     Parameters
     ----------
     X: ndarray (n_samples, n_features)
-        A numpy array of either data or distance matrix.
-        Can also be a sparse distance matrix of type scipy.sparse
+        A numpy array of either data or distance matrix. Can also be a sparse
+        distance matrix of type scipy.sparse
 
     maxdim: int, optional, default 1
-        Maximum homology dimension computed. Will compute all dimensions
-        lower than and equal to this value.
-        For 1, H_0 and H_1 will be computed.
+        Maximum homology dimension computed. Will compute all dimensions lower
+        than and equal to this value. For 1, H_0 and H_1 will be computed.
 
-    thresh: float, default infinity
-        Maximum distances considered when constructing filtration.
-        If infinity, compute the entire filtration.
+    thresh: float, default ``numpy.inf``
+        Maximum distances considered when constructing filtration. If
+        ``numpy.inf``, compute the entire filtration.
 
     coeff: int prime, default 2
         Compute homology with coefficients in the prime field Z/pZ for p=coeff.
@@ -129,19 +128,18 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
     metric: string or callable
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
-        specified in pairwise_distances, including "euclidean", "manhattan",
-        or "cosine". Alternatively, if metric is a callable function, it is
-        called on each pair of instances (rows) and the resulting value
-        recorded. The callable should take two arrays from X as input and
-        return a value indicating the distance between them.
+        specified in pairwise_distances, including "euclidean", "manhattan", or
+        "cosine". Alternatively, if metric is a callable function, it is called
+        on each pair of instances (rows) and the resulting value recorded. The
+        callable should take two arrays from X as input and return a value
+        indicating the distance between them.
 
     n_perm: int
-        The number of points to subsample in a "greedy permutation,"
-        or a furthest point sampling of the points.  These points
-        will be used in lieu of the full point cloud for a faster
-        computation, at the expense of some accuracy, which can
-        be bounded as a maximum bottleneck distance to all diagrams
-        on the original point set
+        The number of points to subsample in a "greedy permutation", or a
+        furthest point sampling of the points. These points will be used in
+        lieu of the full point cloud for a faster computation, at the expense
+        of some accuracy, which can be bounded as a maximum bottleneck distance
+        to all diagrams on the original point set.
 
     Returns
     -------

From 085d9b95723aa3cdb486178aa6afe0c2440898f4 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Sun, 30 Aug 2020 21:51:14 +0200
Subject: [PATCH 06/27] Make more collapser bindings which compile

---
 .../externals/bindings/collapser_bindings.cpp | 65 ++++++++++++++++++-
 1 file changed, 62 insertions(+), 3 deletions(-)

diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp
index f2c8227d8..92d5ee9d3 100644
--- a/gtda/externals/bindings/collapser_bindings.cpp
+++ b/gtda/externals/bindings/collapser_bindings.cpp
@@ -22,10 +22,16 @@ using Filtered_edge_list = std::vector<Filtered_edge>;
 using Sparse_matrix = Eigen::SparseMatrix<Filtration_value>;
 using triplet_vec = Eigen::Triplet<Filtration_value>;
 
+/* COO data input types */
+using Row_idx = std::vector<uint32_t>;
+using Col_idx = std::vector<uint32_t>;
+using Filtration_values = std::vector<float>;
+using COO_data = std::tuple<Row_idx, Col_idx, Filtration_values>;
+
 /* Dense matrix input types */
 using Distance_matrix = std::vector<std::vector<Filtration_value>>;
 
-/* Generates a sparse matrix from a filtered edges list
+/* Generates a sparse matrix from a filtered edge list
  * This function is called after computing edge collapse
  */
 static Sparse_matrix generate_sparse_matrix(
@@ -43,10 +49,27 @@ static Sparse_matrix generate_sparse_matrix(
   return mat;
 }
 
+/* Generates COO sparse matrix data from a filtered edge list
+ * This function is called after computing edge collapse
+ */
+static COO_data coo_data_from_filtered_edge_list(
+    Filtered_edge_list& collapsed_edges) {
+  Row_idx row;
+  Col_idx col;
+  Filtration_values data;
+  for (auto& t : collapsed_edges) {
+    row.push_back(std::get<0>(t));
+    col.push_back(std::get<1>(t));
+    data.push_back(std::get<2>(t));
+  }
+
+  return COO_data(row, col, data);
+}
+
 PYBIND11_MODULE(gtda_collapser, m) {
   using namespace pybind11::literals;
 
-  m.doc() = "Collapser bindings for Gudhi  implementation";
+  m.doc() = "Collapser bindings for GUDHI implementation";
   m.def("flag_complex_collapse_edges_sparse",
         [](Sparse_matrix& graph_) {
           Filtered_edge_list graph;
@@ -69,10 +92,26 @@ PYBIND11_MODULE(gtda_collapser, m) {
         "Implicitly constructs a flag complex from edges, "
         "collapses edges while preserving the persistent homology");
 
+  m.def("flag_complex_collapse_edges_from_coo_data_to_coo_data",
+        [](Row_idx& row, Col_idx& col, Filtration_values& data) {
+          Filtered_edge_list graph;
+
+          /* Convert from COO input format to Filtered_edge_list */
+          int size = data.size();
+          for (size_t k = 0; k < size; ++k) graph.push_back(Filtered_edge(row[k], col[k], data[k]));
+
+          /* Start collapser */
+          auto vec_triples =
+              Gudhi::collapse::flag_complex_collapse_edges(graph);
+          return coo_data_from_filtered_edge_list(vec_triples);
+        },
+        "row"_a, "column"_a, "data"_a,
+        "Implicitly constructs a flag complex from edges, "
+        "collapses edges while preserving the persistent homology");
+
   m.def("flag_complex_collapse_edges_dense",
         [](Distance_matrix& dm) {
           Filtered_edge_list graph;
-          std::vector<triplet_vec> triplets;
 
           /* Convert from dense format to Filtered edge list */
           for (size_t i = 0; i < dm.size(); i++)
@@ -88,4 +127,24 @@ PYBIND11_MODULE(gtda_collapser, m) {
         "dense_matrix"_a,
         "Implicitly constructs a flag complex from edges, "
         "collapses edges while preserving the persistent homology");
+
+  m.def("flag_complex_collapse_edges_from_dense_to_coo_data",
+        [](Distance_matrix& dm) {
+          Filtered_edge_list graph;
+
+          /* Convert from dense format to Filtered edge list */
+          for (size_t i = 0; i < dm.size(); i++)
+            for (size_t j = 0; j < dm[i].size(); j++)
+              if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j]));
+
+          /* Start collapser */
+          auto vec_triples =
+              Gudhi::collapse::flag_complex_collapse_edges(graph);
+
+          return coo_data_from_filtered_edge_list(vec_triples);
+        },
+        "dense_matrix"_a,
+        "Implicitly constructs a flag complex from edges, "
+        "collapses edges while preserving the persistent homology");
+
 }

From 55d8c58583cd7e055b77f2c6a0be4f7c62151909 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Sun, 30 Aug 2020 22:56:52 +0200
Subject: [PATCH 07/27] Add collapse_edges kwarg to ripser to enable edge
 collapser

---
 gtda/externals/python/ripser_interface.py | 65 ++++++++++++++++++-----
 1 file changed, 53 insertions(+), 12 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index b8bbabf1e..674cb0de8 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -1,7 +1,14 @@
 from scipy import sparse
 import numpy as np
 from sklearn.metrics.pairwise import pairwise_distances
-from ..modules import gtda_ripser, gtda_ripser_coeff
+from ..modules import gtda_ripser, gtda_ripser_coeff, gtda_collapser
+
+
+def _lexsort_coo_data(row, col, data):
+    lex_sort_idx = np.lexsort((col, row))
+    row, col, data = \
+        row[lex_sort_idx], col[lex_sort_idx], data[lex_sort_idx]
+    return row, col, data
 
 
 def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=1):
@@ -100,11 +107,11 @@ def get_greedy_perm(X, n_perm=None, metric="euclidean"):
         ds = np.minimum(ds, dperm2all[-1])
     lambdas[-1] = np.max(ds)
     dperm2all = np.array(dperm2all)
-    return (idx_perm, lambdas, dperm2all)
+    return idx_perm, lambdas, dperm2all
 
 
 def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
-           n_perm=None):
+           n_perm=None, collapse_edges=False):
     """Compute persistence diagrams for X data array. If X is not a distance
     matrix, it will be converted to a distance matrix using the chosen metric.
 
@@ -141,6 +148,8 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
         of some accuracy, which can be bounded as a maximum bottleneck distance
         to all diagrams on the original point set.
 
+    collapse_edges : TODO
+
     Returns
     -------
     A dictionary holding all of the results of the computation
@@ -205,18 +214,29 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
         dm = sparse.coo_matrix(dm)
 
     if sparse.issparse(dm):
-        if sparse.isspmatrix_coo(dm):
+        if sparse.isspmatrix_coo(dm) and not collapse_edges:
             # If the matrix is already COO, we need to order the row and column
             # indices lexicographically to avoid errors. See
             # https://github.com/scikit-tda/ripser.py/issues/103
-            row, col, data = dm.row, dm.col, dm.data
-            lex_sort_idx = np.lexsort((col, row))
-            row, col, data = \
-                row[lex_sort_idx], col[lex_sort_idx], data[lex_sort_idx]
+            row, col, data = _lexsort_coo_data(dm.row, dm.col, dm.data)
         else:
             # Lexicographic sorting is performed by scipy upon conversion
             coo = dm.tocoo()
             row, col, data = coo.row, coo.col, coo.data
+        if collapse_edges:
+            # Threshold first. Ideally, this would be implemented as a
+            # modification of the C++ code
+            if thresh < np.inf:
+                thresholded_idx = np.flatnonzero(data <= thresh)
+                data = data[thresholded_idx]
+                row = row[thresholded_idx]
+                col = col[thresholded_idx]
+            row, col, data = \
+                gtda_collapser.flag_complex_collapse_edges_from_coo_data_to_coo_data(row, col, data)
+            row, col, data = _lexsort_coo_data(
+                np.asarray(row), np.asarray(col), np.asarray(data)
+                )
+
         res = DRFDMSparse(
             row.astype(dtype=np.int32, order="C"),
             col.astype(dtype=np.int32, order="C"),
@@ -224,12 +244,33 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
             n_points,
             maxdim,
             thresh,
-            coeff,
+            coeff
             )
     else:
-        I, J = np.meshgrid(np.arange(n_points), np.arange(n_points))
-        DParam = np.array(dm[I > J], dtype=np.float32)
-        res = DRFDM(DParam, maxdim, thresh, coeff)
+        if collapse_edges:
+            # Threshold first. Ideally, this would be implemented as a
+            # modification of the C++ code
+            if thresh < np.inf:
+                dm = np.where(dm <= thresh, dm, np.inf)
+            row, col, data = \
+                gtda_collapser.flag_complex_collapse_edges_from_dense_to_coo_data(dm)
+            row, col, data = \
+                _lexsort_coo_data(np.asarray(row), np.asarray(col),
+                                  np.asarray(data))
+            res = DRFDMSparse(
+                row.astype(dtype=np.int32, order="C"),
+                col.astype(dtype=np.int32, order="C"),
+                np.array(data, dtype=np.float32, order="C"),
+                n_points,
+                maxdim,
+                thresh,
+                coeff
+                )
+        else:
+            # Only consider upper diagonal
+            I, J = np.meshgrid(np.arange(n_points), np.arange(n_points))
+            DParam = np.array(dm[I > J], dtype=np.float32)
+            res = DRFDM(DParam, maxdim, thresh, coeff)
 
     # Unwrap persistence diagrams
     dgms = res["births_and_deaths_by_dim"]

From 0b6663caa70574269e64e925624c84d09412556f Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Mon, 31 Aug 2020 13:53:37 +0200
Subject: [PATCH 08/27] Update collapser bindings

* All functions have the same name, supporting different input types
* All functions returns now tuple<vector, vector, vector>

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 gtda/externals/__init__.py                    |  8 ++-
 .../externals/bindings/collapser_bindings.cpp | 60 ++++++++-----------
 2 files changed, 31 insertions(+), 37 deletions(-)

diff --git a/gtda/externals/__init__.py b/gtda/externals/__init__.py
index 00bffafff..3e800ecfb 100644
--- a/gtda/externals/__init__.py
+++ b/gtda/externals/__init__.py
@@ -3,18 +3,20 @@
 
 from .modules.gtda_bottleneck import bottleneck_distance
 from .modules.gtda_wasserstein import wasserstein_distance
+from .modules.gtda_collapser import flag_complex_collapse_edges
 from .python import ripser, SparseRipsComplex, CechComplex, CubicalComplex, \
-    PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex
+    PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex \
 
 __all__ = [
     'bottleneck_distance',
     'wasserstein_distance',
     'ripser',
     'SparseRipsComplex',
-    'CechComplex', 
+    'CechComplex',
     'CubicalComplex',
     'PeriodicCubicalComplex',
     'SimplexTree',
     'WitnessComplex',
-    'StrongWitnessComplex'
+    'StrongWitnessComplex',
+    'flag_complex_collapse_edges'
     ]
diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp
index 92d5ee9d3..b823c4b7f 100644
--- a/gtda/externals/bindings/collapser_bindings.cpp
+++ b/gtda/externals/bindings/collapser_bindings.cpp
@@ -23,9 +23,9 @@ using Sparse_matrix = Eigen::SparseMatrix<Filtration_value>;
 using triplet_vec = Eigen::Triplet<Filtration_value>;
 
 /* COO data input types */
-using Row_idx = std::vector<uint32_t>;
-using Col_idx = std::vector<uint32_t>;
-using Filtration_values = std::vector<float>;
+using Row_idx = std::vector<Vertex_handle>;
+using Col_idx = std::vector<Vertex_handle>;
+using Filtration_values = std::vector<Filtration_value>;
 using COO_data = std::tuple<Row_idx, Col_idx, Filtration_values>;
 
 /* Dense matrix input types */
@@ -33,14 +33,18 @@ using Distance_matrix = std::vector<std::vector<Filtration_value>>;
 
 /* Generates a sparse matrix from a filtered edge list
  * This function is called after computing edge collapse
+ * At the moment this function is unused because Eigen only manages
+ * CSR sparse format, but in the case of giotto-tda we need COO format
  */
-static Sparse_matrix generate_sparse_matrix(
-    Filtered_edge_list& collapsed_edges, int size) {
+static Sparse_matrix generate_sparse_matrix(Filtered_edge_list& collapsed_edges,
+                                            size_t size) {
   std::vector<triplet_vec> triplets;
   /* Create triplets to efficiently create a return matrix */
   for (auto& t : collapsed_edges) {
     triplets.push_back(
         triplet_vec(std::get<0>(t), std::get<1>(t), std::get<2>(t)));
+    std::cout << std::get<0>(t) << ", " << std::get<1>(t) << " : "
+              << std::get<2>(t) << "\n";
   }
 
   Sparse_matrix mat(size, size);
@@ -52,11 +56,16 @@ static Sparse_matrix generate_sparse_matrix(
 /* Generates COO sparse matrix data from a filtered edge list
  * This function is called after computing edge collapse
  */
-static COO_data coo_data_from_filtered_edge_list(
-    Filtered_edge_list& collapsed_edges) {
+static COO_data gen_coo_matrix(Filtered_edge_list& collapsed_edges) {
   Row_idx row;
   Col_idx col;
   Filtration_values data;
+
+  /* allocate memory beforehand */
+  row.reserve(collapsed_edges.size());
+  col.reserve(collapsed_edges.size());
+  data.reserve(collapsed_edges.size());
+
   for (auto& t : collapsed_edges) {
     row.push_back(std::get<0>(t));
     col.push_back(std::get<1>(t));
@@ -70,7 +79,7 @@ PYBIND11_MODULE(gtda_collapser, m) {
   using namespace pybind11::literals;
 
   m.doc() = "Collapser bindings for GUDHI implementation";
-  m.def("flag_complex_collapse_edges_sparse",
+  m.def("flag_complex_collapse_edges",
         [](Sparse_matrix& graph_) {
           Filtered_edge_list graph;
 
@@ -86,49 +95,33 @@ PYBIND11_MODULE(gtda_collapser, m) {
           /* Start collapser */
           auto vec_triples =
               Gudhi::collapse::flag_complex_collapse_edges(graph);
-          return generate_sparse_matrix(vec_triples, size);
+
+          return gen_coo_matrix(vec_triples);
         },
         "sparse_matrix"_a,
         "Implicitly constructs a flag complex from edges, "
         "collapses edges while preserving the persistent homology");
 
-  m.def("flag_complex_collapse_edges_from_coo_data_to_coo_data",
+  m.def("flag_complex_collapse_edges",
         [](Row_idx& row, Col_idx& col, Filtration_values& data) {
           Filtered_edge_list graph;
 
           /* Convert from COO input format to Filtered_edge_list */
           int size = data.size();
-          for (size_t k = 0; k < size; ++k) graph.push_back(Filtered_edge(row[k], col[k], data[k]));
-
-          /* Start collapser */
-          auto vec_triples =
-              Gudhi::collapse::flag_complex_collapse_edges(graph);
-          return coo_data_from_filtered_edge_list(vec_triples);
-        },
-        "row"_a, "column"_a, "data"_a,
-        "Implicitly constructs a flag complex from edges, "
-        "collapses edges while preserving the persistent homology");
-
-  m.def("flag_complex_collapse_edges_dense",
-        [](Distance_matrix& dm) {
-          Filtered_edge_list graph;
-
-          /* Convert from dense format to Filtered edge list */
-          for (size_t i = 0; i < dm.size(); i++)
-            for (size_t j = 0; j < dm[i].size(); j++)
-              if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j]));
+          for (size_t k = 0; k < size; ++k)
+            graph.push_back(Filtered_edge(row[k], col[k], data[k]));
 
           /* Start collapser */
           auto vec_triples =
               Gudhi::collapse::flag_complex_collapse_edges(graph);
 
-          return generate_sparse_matrix(vec_triples, dm.size());
+          return gen_coo_matrix(vec_triples);
         },
-        "dense_matrix"_a,
+        "row"_a, "column"_a, "data"_a,
         "Implicitly constructs a flag complex from edges, "
         "collapses edges while preserving the persistent homology");
 
-  m.def("flag_complex_collapse_edges_from_dense_to_coo_data",
+  m.def("flag_complex_collapse_edges",
         [](Distance_matrix& dm) {
           Filtered_edge_list graph;
 
@@ -141,10 +134,9 @@ PYBIND11_MODULE(gtda_collapser, m) {
           auto vec_triples =
               Gudhi::collapse::flag_complex_collapse_edges(graph);
 
-          return coo_data_from_filtered_edge_list(vec_triples);
+          return gen_coo_matrix(vec_triples);
         },
         "dense_matrix"_a,
         "Implicitly constructs a flag complex from edges, "
         "collapses edges while preserving the persistent homology");
-
 }

From ce7f5137b3e7746b4ae4bf59dc3eadfc8da62dab Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Mon, 31 Aug 2020 13:55:47 +0200
Subject: [PATCH 09/27] Add test for the collapser bindings

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 gtda/externals/python/tests/test_collapser.py | 55 +++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 gtda/externals/python/tests/test_collapser.py

diff --git a/gtda/externals/python/tests/test_collapser.py b/gtda/externals/python/tests/test_collapser.py
new file mode 100644
index 000000000..14cf1f915
--- /dev/null
+++ b/gtda/externals/python/tests/test_collapser.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+""" Test comes from
+https://github.com/GUDHI/gudhi-devel/blob/master/src/Collapse/example/edge_collapse_basic_example.cpp
+"""
+
+
+import numpy as np
+import scipy
+from scipy.sparse import coo_matrix, csr_matrix
+from gtda.externals.modules.gtda_collapser import flag_complex_collapse_edges
+
+
+X = np.array([[0, 1, 1.],
+              [1, 2, 1.],
+              [2, 3, 1.],
+              [3, 0, 1.],
+              [0, 2, 2.],
+              [1, 3, 2.]], dtype=np.int32)
+tX = np.transpose(X)
+
+
+def check_collapse(collapsed, removed):
+    coo = collapsed.tocoo()
+    cooT = np.array([coo.row, coo.col, coo.data]).transpose()
+    for elem in removed:
+        if (cooT == elem).all(axis=1).any():
+            print(elem)
+            return False
+
+    return True
+
+
+def test_simple_csr_example():
+    X = csr_matrix((tX[2], (tX[0], tX[1])))
+    coo_ = flag_complex_collapse_edges(X)
+    coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
+    assert check_collapse(coo,
+                          [[1, 3, 2]])
+
+
+def test_simple_coo_example():
+    coo_ = flag_complex_collapse_edges(
+        tX[0], tX[1], tX[2])
+    coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
+    assert check_collapse(coo,
+                          [[1, 3, 2]])
+
+
+def test_simple_dense_example():
+    data = csr_matrix((tX[2], (tX[0], tX[1]))).tocoo()
+    coo_ = flag_complex_collapse_edges(data)
+    coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
+    assert check_collapse(coo,
+                          [[1, 3, 2]])

From 0365642a37093c11503bb2874b5f8cb1539b6117 Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Mon, 31 Aug 2020 13:56:04 +0200
Subject: [PATCH 10/27] Update azure-pipeline ccache to allow building them
 again

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 azure-pipelines.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 8536b80de..1a91e56a1 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -41,7 +41,7 @@ jobs:
 
   - task: Cache@2
     inputs:
-      key: '"ccache-wheels-v2020.08.28" | $(Agent.OS) | "$(python.version)"'
+      key: '"ccache-wheels-v2020.08.31" | $(Agent.OS) | "$(python.version)"'
       path: $(CCACHE_DIR)
     displayName: ccache
 
@@ -136,7 +136,7 @@ jobs:
 
   - task: Cache@2
     inputs:
-      key: '"ccache-v2020.08.28" | $(Agent.OS) | "$(python.version)"'
+      key: '"ccache-v2020.08.31" | $(Agent.OS) | "$(python.version)"'
       path: $(CCACHE_DIR)
     displayName: ccache
 

From 57abf4f4bfd5cf80a8f89391bbddfa818c2459e1 Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Tue, 1 Sep 2020 13:50:39 +0200
Subject: [PATCH 11/27] Update bindings names and add threshold parameter

Eigen constructor allows vector of vector, this clashes with one of our input
Therefore, we choose to use a different name for each input type

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 gtda/externals/__init__.py                    |  7 +++-
 .../externals/bindings/collapser_bindings.cpp | 39 ++++++++++++-------
 gtda/externals/python/tests/test_collapser.py | 13 ++++---
 3 files changed, 37 insertions(+), 22 deletions(-)

diff --git a/gtda/externals/__init__.py b/gtda/externals/__init__.py
index 3e800ecfb..7d0461352 100644
--- a/gtda/externals/__init__.py
+++ b/gtda/externals/__init__.py
@@ -3,7 +3,8 @@
 
 from .modules.gtda_bottleneck import bottleneck_distance
 from .modules.gtda_wasserstein import wasserstein_distance
-from .modules.gtda_collapser import flag_complex_collapse_edges
+from .modules.gtda_collapser import flag_complex_collapse_edges_dense, \
+    flag_complex_collapse_edges_sparse, flag_complex_collapse_edges_coo
 from .python import ripser, SparseRipsComplex, CechComplex, CubicalComplex, \
     PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex \
 
@@ -18,5 +19,7 @@
     'SimplexTree',
     'WitnessComplex',
     'StrongWitnessComplex',
-    'flag_complex_collapse_edges'
+    'flag_complex_collapse_edges_dense',
+    'flag_complex_collapse_edges_sparse',
+    'flag_complex_collapse_edges_coo'
     ]
diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp
index b823c4b7f..7416ea478 100644
--- a/gtda/externals/bindings/collapser_bindings.cpp
+++ b/gtda/externals/bindings/collapser_bindings.cpp
@@ -31,6 +31,9 @@ using COO_data = std::tuple<Row_idx, Col_idx, Filtration_values>;
 /* Dense matrix input types */
 using Distance_matrix = std::vector<std::vector<Filtration_value>>;
 
+/* constants */
+const Filtration_value filtration_max =
+    std::numeric_limits<Filtration_value>::infinity();
 /* Generates a sparse matrix from a filtered edge list
  * This function is called after computing edge collapse
  * At the moment this function is unused because Eigen only manages
@@ -79,17 +82,18 @@ PYBIND11_MODULE(gtda_collapser, m) {
   using namespace pybind11::literals;
 
   m.doc() = "Collapser bindings for GUDHI implementation";
-  m.def("flag_complex_collapse_edges",
-        [](Sparse_matrix& graph_) {
+  m.def("flag_complex_collapse_edges_sparse",
+        [](Sparse_matrix& sm, Filtration_value thresh = filtration_max) {
           Filtered_edge_list graph;
 
           /* Convert from sparse format to Filtered_edge_list */
-          int size = graph_.outerSize();
+          /* Applying threshold to the input data */
+          int size = sm.outerSize();
           for (size_t k = 0; k < size; ++k)
-            for (Eigen::SparseMatrix<Filtration_value>::InnerIterator it(graph_,
-                                                                         k);
+            for (Eigen::SparseMatrix<Filtration_value>::InnerIterator it(sm, k);
                  it; ++it) {
-              graph.push_back(Filtered_edge(it.row(), it.col(), it.value()));
+              if (it.value() <= thresh)
+                graph.push_back(Filtered_edge(it.row(), it.col(), it.value()));
             }
 
           /* Start collapser */
@@ -98,18 +102,21 @@ PYBIND11_MODULE(gtda_collapser, m) {
 
           return gen_coo_matrix(vec_triples);
         },
-        "sparse_matrix"_a,
+        "sm"_a, "thresh"_a = filtration_max,
         "Implicitly constructs a flag complex from edges, "
         "collapses edges while preserving the persistent homology");
 
-  m.def("flag_complex_collapse_edges",
-        [](Row_idx& row, Col_idx& col, Filtration_values& data) {
+  m.def("flag_complex_collapse_edges_coo",
+        [](Row_idx& row, Col_idx& col, Filtration_values& data,
+           Filtration_value thresh = filtration_max) {
           Filtered_edge_list graph;
 
           /* Convert from COO input format to Filtered_edge_list */
+          /* Applying threshold to the input data */
           int size = data.size();
           for (size_t k = 0; k < size; ++k)
-            graph.push_back(Filtered_edge(row[k], col[k], data[k]));
+            if (data[k] <= thresh)
+              graph.push_back(Filtered_edge(row[k], col[k], data[k]));
 
           /* Start collapser */
           auto vec_triples =
@@ -117,18 +124,20 @@ PYBIND11_MODULE(gtda_collapser, m) {
 
           return gen_coo_matrix(vec_triples);
         },
-        "row"_a, "column"_a, "data"_a,
+        "row"_a, "column"_a, "data"_a, "thresh"_a = filtration_max,
         "Implicitly constructs a flag complex from edges, "
         "collapses edges while preserving the persistent homology");
 
-  m.def("flag_complex_collapse_edges",
-        [](Distance_matrix& dm) {
+  m.def("flag_complex_collapse_edges_dense",
+        [](Distance_matrix& dm, Filtration_value thresh = filtration_max) {
           Filtered_edge_list graph;
 
           /* Convert from dense format to Filtered edge list */
+          /* Applying threshold to the input data */
           for (size_t i = 0; i < dm.size(); i++)
             for (size_t j = 0; j < dm[i].size(); j++)
-              if (j > i) graph.push_back(Filtered_edge(i, j, dm[i][j]));
+              if (j > i && (dm[i][j] <= thresh))
+                graph.push_back(Filtered_edge(i, j, dm[i][j]));
 
           /* Start collapser */
           auto vec_triples =
@@ -136,7 +145,7 @@ PYBIND11_MODULE(gtda_collapser, m) {
 
           return gen_coo_matrix(vec_triples);
         },
-        "dense_matrix"_a,
+        "dm"_a, "thresh"_a = filtration_max,
         "Implicitly constructs a flag complex from edges, "
         "collapses edges while preserving the persistent homology");
 }
diff --git a/gtda/externals/python/tests/test_collapser.py b/gtda/externals/python/tests/test_collapser.py
index 14cf1f915..8b56d63a0 100644
--- a/gtda/externals/python/tests/test_collapser.py
+++ b/gtda/externals/python/tests/test_collapser.py
@@ -8,7 +8,10 @@
 import numpy as np
 import scipy
 from scipy.sparse import coo_matrix, csr_matrix
-from gtda.externals.modules.gtda_collapser import flag_complex_collapse_edges
+from gtda.externals.modules.gtda_collapser import \
+    flag_complex_collapse_edges_dense, \
+    flag_complex_collapse_edges_sparse, \
+    flag_complex_collapse_edges_coo
 
 
 X = np.array([[0, 1, 1.],
@@ -33,14 +36,14 @@ def check_collapse(collapsed, removed):
 
 def test_simple_csr_example():
     X = csr_matrix((tX[2], (tX[0], tX[1])))
-    coo_ = flag_complex_collapse_edges(X)
+    coo_ = flag_complex_collapse_edges_sparse(X)
     coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
     assert check_collapse(coo,
                           [[1, 3, 2]])
 
 
 def test_simple_coo_example():
-    coo_ = flag_complex_collapse_edges(
+    coo_ = flag_complex_collapse_edges_coo(
         tX[0], tX[1], tX[2])
     coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
     assert check_collapse(coo,
@@ -48,8 +51,8 @@ def test_simple_coo_example():
 
 
 def test_simple_dense_example():
-    data = csr_matrix((tX[2], (tX[0], tX[1]))).tocoo()
-    coo_ = flag_complex_collapse_edges(data)
+    data = csr_matrix((tX[2], (tX[0], tX[1]))).toarray()
+    coo_ = flag_complex_collapse_edges_dense(data)
     coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
     assert check_collapse(coo,
                           [[1, 3, 2]])

From 50a9948455e5deffbdb99d910563603f3ae688e4 Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Tue, 1 Sep 2020 13:51:05 +0200
Subject: [PATCH 12/27] Remove unused function from bindings

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 .../externals/bindings/collapser_bindings.cpp | 21 -------------------
 1 file changed, 21 deletions(-)

diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp
index 7416ea478..90a542fe7 100644
--- a/gtda/externals/bindings/collapser_bindings.cpp
+++ b/gtda/externals/bindings/collapser_bindings.cpp
@@ -34,27 +34,6 @@ using Distance_matrix = std::vector<std::vector<Filtration_value>>;
 /* constants */
 const Filtration_value filtration_max =
     std::numeric_limits<Filtration_value>::infinity();
-/* Generates a sparse matrix from a filtered edge list
- * This function is called after computing edge collapse
- * At the moment this function is unused because Eigen only manages
- * CSR sparse format, but in the case of giotto-tda we need COO format
- */
-static Sparse_matrix generate_sparse_matrix(Filtered_edge_list& collapsed_edges,
-                                            size_t size) {
-  std::vector<triplet_vec> triplets;
-  /* Create triplets to efficiently create a return matrix */
-  for (auto& t : collapsed_edges) {
-    triplets.push_back(
-        triplet_vec(std::get<0>(t), std::get<1>(t), std::get<2>(t)));
-    std::cout << std::get<0>(t) << ", " << std::get<1>(t) << " : "
-              << std::get<2>(t) << "\n";
-  }
-
-  Sparse_matrix mat(size, size);
-  mat.setFromTriplets(triplets.begin(), triplets.end());
-
-  return mat;
-}
 
 /* Generates COO sparse matrix data from a filtered edge list
  * This function is called after computing edge collapse

From c375597088ea97bc10018bf2145634d933b6e195 Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Tue, 1 Sep 2020 14:30:43 +0200
Subject: [PATCH 13/27] Refactor ripser_interface

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 gtda/externals/python/ripser_interface.py | 74 +++++++++--------------
 1 file changed, 28 insertions(+), 46 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index 674cb0de8..b9ffd8304 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -213,29 +213,30 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
         # births
         dm = sparse.coo_matrix(dm)
 
-    if sparse.issparse(dm):
-        if sparse.isspmatrix_coo(dm) and not collapse_edges:
-            # If the matrix is already COO, we need to order the row and column
-            # indices lexicographically to avoid errors. See
-            # https://github.com/scikit-tda/ripser.py/issues/103
-            row, col, data = _lexsort_coo_data(dm.row, dm.col, dm.data)
-        else:
-            # Lexicographic sorting is performed by scipy upon conversion
-            coo = dm.tocoo()
-            row, col, data = coo.row, coo.col, coo.data
+    if sparse.issparse(dm) or collapse_edges:
         if collapse_edges:
-            # Threshold first. Ideally, this would be implemented as a
-            # modification of the C++ code
-            if thresh < np.inf:
-                thresholded_idx = np.flatnonzero(data <= thresh)
-                data = data[thresholded_idx]
-                row = row[thresholded_idx]
-                col = col[thresholded_idx]
-            row, col, data = \
-                gtda_collapser.flag_complex_collapse_edges_from_coo_data_to_coo_data(row, col, data)
-            row, col, data = _lexsort_coo_data(
-                np.asarray(row), np.asarray(col), np.asarray(data)
-                )
+            if not sparse.issparse(dm):
+                row, col, data = \
+                    gtda_collapser.flag_complex_collapse_edges_dense(dm)
+            else:
+                coo = dm.tocoo()
+                row, col, data = \
+                    gtda_collapser.flag_complex_collapse_edges_coo(coo.row,
+                                                                   coo.col,
+                                                                   coo.data)
+        else:
+            if sparse.isspmatrix_coo(dm):
+                # If the matrix is already COO, we need to order the row and
+                # column indices lexicographically to avoid errors. See
+                # https://github.com/scikit-tda/ripser.py/issues/103
+                row, col, data = _lexsort_coo_data(dm.row, dm.col, dm.data)
+            else:
+                coo = dm.tocoo()
+                row, col, data = coo.row, coo.col, coo.data
+
+        row, col, data = _lexsort_coo_data(np.asarray(row),
+                                           np.asarray(col),
+                                           np.asarray(data))
 
         res = DRFDMSparse(
             row.astype(dtype=np.int32, order="C"),
@@ -247,30 +248,11 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
             coeff
             )
     else:
-        if collapse_edges:
-            # Threshold first. Ideally, this would be implemented as a
-            # modification of the C++ code
-            if thresh < np.inf:
-                dm = np.where(dm <= thresh, dm, np.inf)
-            row, col, data = \
-                gtda_collapser.flag_complex_collapse_edges_from_dense_to_coo_data(dm)
-            row, col, data = \
-                _lexsort_coo_data(np.asarray(row), np.asarray(col),
-                                  np.asarray(data))
-            res = DRFDMSparse(
-                row.astype(dtype=np.int32, order="C"),
-                col.astype(dtype=np.int32, order="C"),
-                np.array(data, dtype=np.float32, order="C"),
-                n_points,
-                maxdim,
-                thresh,
-                coeff
-                )
-        else:
-            # Only consider upper diagonal
-            I, J = np.meshgrid(np.arange(n_points), np.arange(n_points))
-            DParam = np.array(dm[I > J], dtype=np.float32)
-            res = DRFDM(DParam, maxdim, thresh, coeff)
+        # Only consider upper diagonal
+        I, J = np.meshgrid(np.arange(n_points), np.arange(n_points))
+        DParam = np.array(dm[I > J], dtype=np.float32)
+        print(DParam)
+        res = DRFDM(DParam, maxdim, thresh, coeff)
 
     # Unwrap persistence diagrams
     dgms = res["births_and_deaths_by_dim"]

From ead89ed9eb0e790ddccb30e7e51299664fccf1fd Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Tue, 1 Sep 2020 14:47:10 +0200
Subject: [PATCH 14/27] Add test cases for ripser bindings with collapser

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 gtda/externals/python/tests/test_ripser.py | 77 ++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 gtda/externals/python/tests/test_ripser.py

diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py
new file mode 100644
index 000000000..cd11a2a52
--- /dev/null
+++ b/gtda/externals/python/tests/test_ripser.py
@@ -0,0 +1,77 @@
+import numpy as np
+import scipy as sp
+from .. import ripser
+
+X = np.random.random((50, 50))
+# Without this line, there might be different results because GUDHI
+# assumes zeros in the diagonal
+np.fill_diagonal(X, 0)
+maxdim = 2
+
+
+def test_with_collapser():
+    diags_collapsed = ripser(
+        X,
+        metric='precomputed',
+        maxdim=maxdim,
+        collapse_edges=True)['dgms']
+    diags_not_collapsed = ripser(
+        X,
+        metric='precomputed',
+        maxdim=maxdim,
+        collapse_edges=False)['dgms']
+
+    for i in range(maxdim):
+        assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i])
+
+
+def test_with_collapser_with_tresh():
+    thresh = 0.1
+    diags_collapsed_thresh = ripser(
+        X,
+        metric='precomputed',
+        maxdim=maxdim,
+        thresh=thresh,
+        collapse_edges=True)['dgms']
+    diags_not_collapsed_thresh = ripser(
+        X, metric='precomputed', maxdim=maxdim, thresh=thresh,
+        collapse_edges=False)['dgms']
+
+    for i in range(maxdim):
+        assert np.array_equal(diags_collapsed_thresh[i],
+                              diags_not_collapsed_thresh[i])
+
+
+def test_with_collapser_coo():
+    diags_collapsed = ripser(
+        sp.sparse.coo_matrix(X),
+        metric='precomputed',
+        maxdim=maxdim,
+        collapse_edges=True)['dgms']
+    diags_not_collapsed = ripser(
+        sp.sparse.coo_matrix(X),
+        metric='precomputed',
+        maxdim=maxdim,
+        collapse_edges=False)['dgms']
+
+    for i in range(maxdim):
+        assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i])
+
+
+def test_with_collapser_coo_thresh():
+    thresh = 0.1
+    diags_collapsed = ripser(
+        sp.sparse.coo_matrix(X),
+        metric='precomputed',
+        maxdim=maxdim,
+        thresh=thresh,
+        collapse_edges=True)['dgms']
+    diags_not_collapsed = ripser(
+        sp.sparse.coo_matrix(X),
+        metric='precomputed',
+        maxdim=maxdim,
+        thresh=thresh,
+        collapse_edges=False)['dgms']
+
+    for i in range(maxdim):
+        assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i])

From ca0138622c5e2d7cc7b22628fddccd9c972b0ad5 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <46537483+ulupo@users.noreply.github.com>
Date: Tue, 1 Sep 2020 15:33:58 +0200
Subject: [PATCH 15/27] Remove unnecessary backslash

---
 gtda/externals/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gtda/externals/__init__.py b/gtda/externals/__init__.py
index 7d0461352..b39577ddf 100644
--- a/gtda/externals/__init__.py
+++ b/gtda/externals/__init__.py
@@ -6,7 +6,7 @@
 from .modules.gtda_collapser import flag_complex_collapse_edges_dense, \
     flag_complex_collapse_edges_sparse, flag_complex_collapse_edges_coo
 from .python import ripser, SparseRipsComplex, CechComplex, CubicalComplex, \
-    PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex \
+    PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex
 
 __all__ = [
     'bottleneck_distance',

From 4fb660aad5ef55501829ac7f29827e8433e18a19 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <46537483+ulupo@users.noreply.github.com>
Date: Wed, 2 Sep 2020 09:31:44 +0200
Subject: [PATCH 16/27] Simplify code for upper diagonal extraction in dense
 case

---
 gtda/externals/python/ripser_interface.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index b9ffd8304..d03512295 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -248,10 +248,9 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
             coeff
             )
     else:
-        # Only consider upper diagonal
-        I, J = np.meshgrid(np.arange(n_points), np.arange(n_points))
-        DParam = np.array(dm[I > J], dtype=np.float32)
-        print(DParam)
+        # Only consider strict upper diagonal
+        idx = np.triu_indices(n_points, 1)
+        DParam = dm[idx].astype(np.float32)
         res = DRFDM(DParam, maxdim, thresh, coeff)
 
     # Unwrap persistence diagrams

From 533aba754da90495e948918899a740686d82a857 Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Thu, 3 Sep 2020 08:07:46 +0200
Subject: [PATCH 17/27] Add Flag to indicate when to sort data before computing
 persistent homology

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 gtda/externals/python/ripser_interface.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index d03512295..b667c25e0 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -214,7 +214,11 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
         dm = sparse.coo_matrix(dm)
 
     if sparse.issparse(dm) or collapse_edges:
+        # It's not necessary to apply sort in all cases
+        sort_coo = False
+
         if collapse_edges:
+            sort_coo = True
             if not sparse.issparse(dm):
                 row, col, data = \
                     gtda_collapser.flag_complex_collapse_edges_dense(dm)
@@ -229,14 +233,17 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
                 # If the matrix is already COO, we need to order the row and
                 # column indices lexicographically to avoid errors. See
                 # https://github.com/scikit-tda/ripser.py/issues/103
-                row, col, data = _lexsort_coo_data(dm.row, dm.col, dm.data)
+                sort_coo = True
+                row, col, data = dm.row, dm.col, dm.data
             else:
+                sort_coo = False
                 coo = dm.tocoo()
                 row, col, data = coo.row, coo.col, coo.data
 
-        row, col, data = _lexsort_coo_data(np.asarray(row),
-                                           np.asarray(col),
-                                           np.asarray(data))
+        if sort_coo:
+            row, col, data = _lexsort_coo_data(np.asarray(row),
+                                               np.asarray(col),
+                                               np.asarray(data))
 
         res = DRFDMSparse(
             row.astype(dtype=np.int32, order="C"),

From b4b9404ad49b4a1186cd78323aa641359fced2cb Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Thu, 3 Sep 2020 08:32:39 +0200
Subject: [PATCH 18/27] Fix test for ripser

When it's an sparse matrix, it should be upper/lower triangular or
mirrored on the diagonal => i,j == j,i

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 gtda/externals/python/tests/test_ripser.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py
index cd11a2a52..484bd5e29 100644
--- a/gtda/externals/python/tests/test_ripser.py
+++ b/gtda/externals/python/tests/test_ripser.py
@@ -43,13 +43,14 @@ def test_with_collapser_with_tresh():
 
 
 def test_with_collapser_coo():
+    X_tri = np.triu(X, 1)
     diags_collapsed = ripser(
-        sp.sparse.coo_matrix(X),
+        sp.sparse.coo_matrix(X_tri),
         metric='precomputed',
         maxdim=maxdim,
         collapse_edges=True)['dgms']
     diags_not_collapsed = ripser(
-        sp.sparse.coo_matrix(X),
+        sp.sparse.coo_matrix(X_tri),
         metric='precomputed',
         maxdim=maxdim,
         collapse_edges=False)['dgms']
@@ -60,14 +61,15 @@ def test_with_collapser_coo():
 
 def test_with_collapser_coo_thresh():
     thresh = 0.1
+    X_tri = np.triu(X, 1)
     diags_collapsed = ripser(
-        sp.sparse.coo_matrix(X),
+        sp.sparse.coo_matrix(X_tri),
         metric='precomputed',
         maxdim=maxdim,
         thresh=thresh,
         collapse_edges=True)['dgms']
     diags_not_collapsed = ripser(
-        sp.sparse.coo_matrix(X),
+        sp.sparse.coo_matrix(X_tri),
         metric='precomputed',
         maxdim=maxdim,
         thresh=thresh,

From f35c82057b6d578ef091c13ac9c7aaef0654360c Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Thu, 3 Sep 2020 08:33:43 +0200
Subject: [PATCH 19/27] Add threshold directly into the collapser

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 gtda/externals/python/ripser_interface.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index b667c25e0..17fce1651 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -221,13 +221,15 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
             sort_coo = True
             if not sparse.issparse(dm):
                 row, col, data = \
-                    gtda_collapser.flag_complex_collapse_edges_dense(dm)
+                    gtda_collapser.flag_complex_collapse_edges_dense(dm,
+                                                                     thresh)
             else:
                 coo = dm.tocoo()
                 row, col, data = \
                     gtda_collapser.flag_complex_collapse_edges_coo(coo.row,
                                                                    coo.col,
-                                                                   coo.data)
+                                                                   coo.data,
+                                                                   thresh)
         else:
             if sparse.isspmatrix_coo(dm):
                 # If the matrix is already COO, we need to order the row and

From 071b1685e5989237cd95537e402ea114465aaec0 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 7 Sep 2020 15:32:45 +0200
Subject: [PATCH 20/27] Add references to edge collapse paper

---
 gtda/externals/python/ripser_interface.py | 63 +++++++++++++++++------
 1 file changed, 47 insertions(+), 16 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index 17fce1651..8854e9219 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -112,43 +112,51 @@ def get_greedy_perm(X, n_perm=None, metric="euclidean"):
 
 def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
            n_perm=None, collapse_edges=False):
-    """Compute persistence diagrams for X data array. If X is not a distance
-    matrix, it will be converted to a distance matrix using the chosen metric.
+    """Compute persistence diagrams for X data array using Ripser [1]_.
+
+    If X is not a distance matrix, it will be converted to a distance matrix
+    using the chosen metric.
 
     Parameters
     ----------
-    X: ndarray (n_samples, n_features)
+    X : ndarray of shape (n_samples, n_features)
         A numpy array of either data or distance matrix. Can also be a sparse
         distance matrix of type scipy.sparse
 
-    maxdim: int, optional, default 1
+    maxdim : int, optional, default: ``1``
         Maximum homology dimension computed. Will compute all dimensions lower
         than and equal to this value. For 1, H_0 and H_1 will be computed.
 
-    thresh: float, default ``numpy.inf``
+    thresh : float, optional, default: ``numpy.inf``
         Maximum distances considered when constructing filtration. If
         ``numpy.inf``, compute the entire filtration.
 
-    coeff: int prime, default 2
+    coeff : int prime, optional, default: ``2``
         Compute homology with coefficients in the prime field Z/pZ for p=coeff.
 
-    metric: string or callable
+    metric : string or callable, optional, default: ``'euclidean'``
         The metric to use when calculating distance between instances in a
-        feature array. If metric is a string, it must be one of the options
-        specified in pairwise_distances, including "euclidean", "manhattan", or
-        "cosine". Alternatively, if metric is a callable function, it is called
-        on each pair of instances (rows) and the resulting value recorded. The
-        callable should take two arrays from X as input and return a value
-        indicating the distance between them.
-
-    n_perm: int
+        feature array. If set to ``"precomputed"``, input data is interpreted
+        as a distance matrix or of adjacency matrices of a weighted undirected
+        graph. If a string, it must be one of the options allowed by
+        :func:`scipy.spatial.distance.pdist` for its metric parameter, or a
+        or a metric listed in
+        :obj:`sklearn.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`, including
+        ``'euclidean'``, ``'manhattan'`` or ``'cosine'``. If a callable, it
+        should take pairs of vectors (1D arrays) as input and, for each two
+        vectors in a pair, it should return a scalar indicating the
+        distance/dissimilarity between them.
+
+    n_perm : int or None, optional, default: ``None``
         The number of points to subsample in a "greedy permutation", or a
         furthest point sampling of the points. These points will be used in
         lieu of the full point cloud for a faster computation, at the expense
         of some accuracy, which can be bounded as a maximum bottleneck distance
         to all diagrams on the original point set.
 
-    collapse_edges : TODO
+    collapse_edges : bool, optional, default: ``False``
+        Whether to use the edge collapse algorithm as described in [2]_ prior
+        to calling ``ripser``.
 
     Returns
     -------
@@ -174,6 +182,29 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
             If n_perm <= 0, then the full point cloud was used and this is 0
     }
 
+    References
+    ----------
+    [1] U. Bauer, "Ripser: efficient computation of Vietoris–Rips persistence \
+        barcodes", 2019; `arXiv:1908.02518 \
+        <https://arxiv.org/abs/1908.02518>`_.
+
+    [2] J.-D. Boissonnat and S. Pritam, "Edge Collapse and Persistence of \
+        Flag Complexes"; in *36th International Symposium on Computational \
+        Geometry (SoCG 2020)*, pp. 19:1–19:15, Schloss
+        Dagstuhl-Leibniz–Zentrum für Informatik, 2020;
+        `DOI: 10.4230/LIPIcs.SoCG.2020.19 \
+        <https://doi.org/10.4230/LIPIcs.SoCG.2020.19>`_.
+
+    Notes
+    -----
+    `Ripser <https://github.com/Ripser/ripser>`_ is used as a C++ backend
+    for computing Vietoris–Rips persistent homology. Python bindings were
+    modified for performance from the `ripser.py
+    <https://github.com/scikit-tda/ripser.py>`_ package.
+
+    `GUDHI <https://github.com/GUDHI/gudhi-devel>`_ is used as a C++ backend
+    for the edge collapse algorithm described in [2]_.
+
     """
     if n_perm and sparse.issparse(X):
         raise Exception(

From b87897e2502835063dc084ebe7f3623adc975f12 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 7 Sep 2020 15:34:54 +0200
Subject: [PATCH 21/27] Guess number of points using max of shape tuple instead
 of first shape

Avoids having to reshape sparse arrays explicitly
---
 gtda/externals/python/ripser_interface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index 8854e9219..443d5e184 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -136,7 +136,7 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
 
     metric : string or callable, optional, default: ``'euclidean'``
         The metric to use when calculating distance between instances in a
-        feature array. If set to ``"precomputed"``, input data is interpreted
+        feature array. If set to ``'precomputed'``, input data is interpreted
         as a distance matrix or of adjacency matrices of a weighted undirected
         graph. If a string, it must be one of the options allowed by
         :func:`scipy.spatial.distance.pdist` for its metric parameter, or a
@@ -236,7 +236,7 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
             dm = pairwise_distances(X, metric=metric)
         dperm2all = dm
 
-    n_points = dm.shape[0]
+    n_points = max(dm.shape)
     if not sparse.issparse(dm) and np.sum(np.abs(dm.diagonal()) > 0) > 0:
         # If any of the diagonal elements are nonzero,
         # convert to sparse format, because currently

From dc083ba6d25e306416b2ca4efa5f7eeba1d57415 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 7 Sep 2020 15:38:07 +0200
Subject: [PATCH 22/27] Simplify lexsort flag logic

---
 gtda/externals/python/ripser_interface.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index 443d5e184..35afe6118 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -269,7 +269,6 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
                 sort_coo = True
                 row, col, data = dm.row, dm.col, dm.data
             else:
-                sort_coo = False
                 coo = dm.tocoo()
                 row, col, data = coo.row, coo.col, coo.data
 

From fe30c82bee5ef228f995219fcc87edc97e6e1c94 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 7 Sep 2020 18:42:44 +0200
Subject: [PATCH 23/27] Small refactor of ripser interface to restrict collapse
 to data without non-zero vertex weights

---
 gtda/externals/python/ripser_interface.py | 29 ++++++++++++++---------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index 35afe6118..d61dd8de5 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -1,6 +1,9 @@
-from scipy import sparse
+from warnings import warn
+
 import numpy as np
+from scipy import sparse
 from sklearn.metrics.pairwise import pairwise_distances
+
 from ..modules import gtda_ripser, gtda_ripser_coeff, gtda_collapser
 
 
@@ -237,17 +240,21 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
         dperm2all = dm
 
     n_points = max(dm.shape)
-    if not sparse.issparse(dm) and np.sum(np.abs(dm.diagonal()) > 0) > 0:
-        # If any of the diagonal elements are nonzero,
-        # convert to sparse format, because currently
-        # that's the only format that handles nonzero
-        # births
-        dm = sparse.coo_matrix(dm)
+    sort_coo = True
+    if (dm.diagonal() != 0).any():
+        if collapse_edges:
+            warn("Edge collapses are not supported when any of the diagonal "
+                 "entries are non-zero. Computing persistent homology without "
+                 "using edge collapse.")
+            collapse_edges = False
+        if not sparse.issparse(dm):
+            # If any of the diagonal elements are nonzero, convert to sparse
+            # format, because currently that's the only format that handles
+            # nonzero births
+            dm = sparse.coo_matrix(dm)
+            sort_coo = False
 
     if sparse.issparse(dm) or collapse_edges:
-        # It's not necessary to apply sort in all cases
-        sort_coo = False
-
         if collapse_edges:
             sort_coo = True
             if not sparse.issparse(dm):
@@ -266,11 +273,11 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
                 # If the matrix is already COO, we need to order the row and
                 # column indices lexicographically to avoid errors. See
                 # https://github.com/scikit-tda/ripser.py/issues/103
-                sort_coo = True
                 row, col, data = dm.row, dm.col, dm.data
             else:
                 coo = dm.tocoo()
                 row, col, data = coo.row, coo.col, coo.data
+                sort_coo = False
 
         if sort_coo:
             row, col, data = _lexsort_coo_data(np.asarray(row),

From 701f6f9b4ab6a1e6bb5545f416f52583458b81ac Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 7 Sep 2020 18:44:18 +0200
Subject: [PATCH 24/27] Add hypothesis tests for collapser

Check for barcode equalities
---
 gtda/externals/python/tests/test_collapser.py | 90 ++++++++++++++++---
 1 file changed, 79 insertions(+), 11 deletions(-)

diff --git a/gtda/externals/python/tests/test_collapser.py b/gtda/externals/python/tests/test_collapser.py
index 8b56d63a0..7d5ac8d6c 100644
--- a/gtda/externals/python/tests/test_collapser.py
+++ b/gtda/externals/python/tests/test_collapser.py
@@ -4,15 +4,19 @@
 https://github.com/GUDHI/gudhi-devel/blob/master/src/Collapse/example/edge_collapse_basic_example.cpp
 """
 
-
 import numpy as np
-import scipy
-from scipy.sparse import coo_matrix, csr_matrix
+import pytest
 from gtda.externals.modules.gtda_collapser import \
     flag_complex_collapse_edges_dense, \
     flag_complex_collapse_edges_sparse, \
     flag_complex_collapse_edges_coo
+from hypothesis import given
+from hypothesis.extra.numpy import arrays
+from hypothesis.strategies import floats, integers, composite
+from numpy.testing import assert_almost_equal
+from scipy.sparse import coo_matrix, csr_matrix
 
+from gtda.externals import ripser
 
 X = np.array([[0, 1, 1.],
               [1, 2, 1.],
@@ -28,9 +32,7 @@ def check_collapse(collapsed, removed):
     cooT = np.array([coo.row, coo.col, coo.data]).transpose()
     for elem in removed:
         if (cooT == elem).all(axis=1).any():
-            print(elem)
             return False
-
     return True
 
 
@@ -38,21 +40,87 @@ def test_simple_csr_example():
     X = csr_matrix((tX[2], (tX[0], tX[1])))
     coo_ = flag_complex_collapse_edges_sparse(X)
     coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
-    assert check_collapse(coo,
-                          [[1, 3, 2]])
+    assert check_collapse(coo, [[1, 3, 2]])
 
 
 def test_simple_coo_example():
     coo_ = flag_complex_collapse_edges_coo(
         tX[0], tX[1], tX[2])
     coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
-    assert check_collapse(coo,
-                          [[1, 3, 2]])
+    assert check_collapse(coo, [[1, 3, 2]])
 
 
 def test_simple_dense_example():
     data = csr_matrix((tX[2], (tX[0], tX[1]))).toarray()
     coo_ = flag_complex_collapse_edges_dense(data)
     coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
-    assert check_collapse(coo,
-                          [[1, 3, 2]])
+    assert check_collapse(coo, [[1, 3, 2]])
+
+
+@composite
+def get_dense_distance_matrices(draw):
+    """Generate 2d dense square arrays of floats, with zero along the
+    diagonal."""
+    shapes = draw(integers(min_value=2, max_value=30))
+    distance_matrix = draw(arrays(dtype=np.float,
+                                  elements=floats(allow_nan=False,
+                                                  allow_infinity=True,
+                                                  min_value=0),
+                                  shape=(shapes, shapes), unique=False))
+    np.fill_diagonal(distance_matrix, 0)
+    return distance_matrix
+
+
+@composite
+def get_sparse_distance_matrices(draw):
+    """Generate 2d sparse matrices of floats, with zero along the diagonal."""
+    shapes = draw(integers(min_value=2, max_value=30))
+    distance_matrix = draw(arrays(dtype=np.float,
+                                  elements=floats(allow_nan=False,
+                                                  allow_infinity=True,
+                                                  min_value=0),
+                                  shape=(shapes, shapes), unique=False))
+    distance_matrix = np.triu(distance_matrix, k=1)
+    distance_matrix = coo_matrix(distance_matrix)
+    row, col, data = \
+        distance_matrix.row, distance_matrix.col, distance_matrix.data
+    not_inf_idx = data != np.inf
+    row = row[not_inf_idx]
+    col = col[not_inf_idx]
+    data = data[not_inf_idx]
+    shape = (np.max(row) + 1, np.max(col) + 1) if not_inf_idx.sum() else (0, 0)
+    distance_matrix = coo_matrix((data, (row, col)), shape=shape)
+    return distance_matrix
+
+
+@pytest.mark.parametrize('thresh', [False, True])
+@given(distance_matrix=get_dense_distance_matrices())
+def test_collapse_consistent_with_no_collapse_dense(thresh, distance_matrix):
+    thresh = np.max(distance_matrix) / 2 if thresh else np.inf
+    maxdim = 3
+    pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
+                         metric='precomputed', collapse_edges=True)['dgms']
+    pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
+                            metric='precomputed', collapse_edges=False)['dgms']
+    for i in range(maxdim + 1):
+        pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
+        pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
+        assert_almost_equal(pd_collapse[i], pd_no_collapse[i])
+
+
+@pytest.mark.parametrize('thresh', [False, True])
+@given(distance_matrix=get_sparse_distance_matrices())
+def test_collapse_consistent_with_no_collapse_sparse(thresh, distance_matrix):
+    if thresh and distance_matrix.nnz:
+        thresh = np.max(distance_matrix) / 2
+    else:
+        thresh = np.inf
+    maxdim = 3
+    pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
+                         metric='precomputed', collapse_edges=True)['dgms']
+    pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
+                            metric='precomputed', collapse_edges=False)['dgms']
+    for i in range(maxdim + 1):
+        pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
+        pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
+        assert_almost_equal(pd_collapse[i], pd_no_collapse[i])

From 0565a382eed1ea56e0ba99849a5b3b18646ba733 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Tue, 8 Sep 2020 15:37:40 +0200
Subject: [PATCH 25/27] Place hypothesis tests in test_ripser and remove
 superseded tests there. Add regression test for #465

---
 gtda/externals/python/tests/test_collapser.py |  76 ---------
 gtda/externals/python/tests/test_ripser.py    | 152 ++++++++++--------
 2 files changed, 85 insertions(+), 143 deletions(-)

diff --git a/gtda/externals/python/tests/test_collapser.py b/gtda/externals/python/tests/test_collapser.py
index 7d5ac8d6c..19b64340c 100644
--- a/gtda/externals/python/tests/test_collapser.py
+++ b/gtda/externals/python/tests/test_collapser.py
@@ -5,19 +5,12 @@
 """
 
 import numpy as np
-import pytest
 from gtda.externals.modules.gtda_collapser import \
     flag_complex_collapse_edges_dense, \
     flag_complex_collapse_edges_sparse, \
     flag_complex_collapse_edges_coo
-from hypothesis import given
-from hypothesis.extra.numpy import arrays
-from hypothesis.strategies import floats, integers, composite
-from numpy.testing import assert_almost_equal
 from scipy.sparse import coo_matrix, csr_matrix
 
-from gtda.externals import ripser
-
 X = np.array([[0, 1, 1.],
               [1, 2, 1.],
               [2, 3, 1.],
@@ -55,72 +48,3 @@ def test_simple_dense_example():
     coo_ = flag_complex_collapse_edges_dense(data)
     coo = coo_matrix((coo_[2], (coo_[0], coo_[1])))
     assert check_collapse(coo, [[1, 3, 2]])
-
-
-@composite
-def get_dense_distance_matrices(draw):
-    """Generate 2d dense square arrays of floats, with zero along the
-    diagonal."""
-    shapes = draw(integers(min_value=2, max_value=30))
-    distance_matrix = draw(arrays(dtype=np.float,
-                                  elements=floats(allow_nan=False,
-                                                  allow_infinity=True,
-                                                  min_value=0),
-                                  shape=(shapes, shapes), unique=False))
-    np.fill_diagonal(distance_matrix, 0)
-    return distance_matrix
-
-
-@composite
-def get_sparse_distance_matrices(draw):
-    """Generate 2d sparse matrices of floats, with zero along the diagonal."""
-    shapes = draw(integers(min_value=2, max_value=30))
-    distance_matrix = draw(arrays(dtype=np.float,
-                                  elements=floats(allow_nan=False,
-                                                  allow_infinity=True,
-                                                  min_value=0),
-                                  shape=(shapes, shapes), unique=False))
-    distance_matrix = np.triu(distance_matrix, k=1)
-    distance_matrix = coo_matrix(distance_matrix)
-    row, col, data = \
-        distance_matrix.row, distance_matrix.col, distance_matrix.data
-    not_inf_idx = data != np.inf
-    row = row[not_inf_idx]
-    col = col[not_inf_idx]
-    data = data[not_inf_idx]
-    shape = (np.max(row) + 1, np.max(col) + 1) if not_inf_idx.sum() else (0, 0)
-    distance_matrix = coo_matrix((data, (row, col)), shape=shape)
-    return distance_matrix
-
-
-@pytest.mark.parametrize('thresh', [False, True])
-@given(distance_matrix=get_dense_distance_matrices())
-def test_collapse_consistent_with_no_collapse_dense(thresh, distance_matrix):
-    thresh = np.max(distance_matrix) / 2 if thresh else np.inf
-    maxdim = 3
-    pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
-                         metric='precomputed', collapse_edges=True)['dgms']
-    pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
-                            metric='precomputed', collapse_edges=False)['dgms']
-    for i in range(maxdim + 1):
-        pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
-        pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
-        assert_almost_equal(pd_collapse[i], pd_no_collapse[i])
-
-
-@pytest.mark.parametrize('thresh', [False, True])
-@given(distance_matrix=get_sparse_distance_matrices())
-def test_collapse_consistent_with_no_collapse_sparse(thresh, distance_matrix):
-    if thresh and distance_matrix.nnz:
-        thresh = np.max(distance_matrix) / 2
-    else:
-        thresh = np.inf
-    maxdim = 3
-    pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
-                         metric='precomputed', collapse_edges=True)['dgms']
-    pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
-                            metric='precomputed', collapse_edges=False)['dgms']
-    for i in range(maxdim + 1):
-        pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
-        pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
-        assert_almost_equal(pd_collapse[i], pd_no_collapse[i])
diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py
index 484bd5e29..d47708610 100644
--- a/gtda/externals/python/tests/test_ripser.py
+++ b/gtda/externals/python/tests/test_ripser.py
@@ -1,79 +1,97 @@
 import numpy as np
-import scipy as sp
-from .. import ripser
+import pytest
+from hypothesis import given
+from hypothesis.extra.numpy import arrays
+from hypothesis.strategies import floats, integers, composite
+from numpy.testing import assert_almost_equal
+from scipy.sparse import coo_matrix
 
-X = np.random.random((50, 50))
-# Without this line, there might be different results because GUDHI
-# assumes zeros in the diagonal
-np.fill_diagonal(X, 0)
-maxdim = 2
+from gtda.externals import ripser
 
 
-def test_with_collapser():
-    diags_collapsed = ripser(
-        X,
-        metric='precomputed',
-        maxdim=maxdim,
-        collapse_edges=True)['dgms']
-    diags_not_collapsed = ripser(
-        X,
-        metric='precomputed',
-        maxdim=maxdim,
-        collapse_edges=False)['dgms']
+@composite
+def get_dense_distance_matrices(draw):
+    """Generate 2d dense square arrays of floats, with zero along the
+    diagonal."""
+    shapes = draw(integers(min_value=2, max_value=30))
+    distance_matrix = draw(arrays(dtype=np.float,
+                                  elements=floats(allow_nan=False,
+                                                  allow_infinity=True,
+                                                  min_value=0),
+                                  shape=(shapes, shapes), unique=False))
+    np.fill_diagonal(distance_matrix, 0)
+    return distance_matrix
 
-    for i in range(maxdim):
-        assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i])
 
+@composite
+def get_sparse_distance_matrices(draw):
+    """Generate 2d sparse matrices of floats, with zero along the diagonal."""
+    shapes = draw(integers(min_value=2, max_value=40))
+    distance_matrix = draw(arrays(dtype=np.float,
+                                  elements=floats(allow_nan=False,
+                                                  allow_infinity=True,
+                                                  min_value=0),
+                                  shape=(shapes, shapes), unique=False))
+    distance_matrix = np.triu(distance_matrix, k=1)
+    distance_matrix = coo_matrix(distance_matrix)
+    row, col, data = \
+        distance_matrix.row, distance_matrix.col, distance_matrix.data
+    not_inf_idx = data != np.inf
+    row = row[not_inf_idx]
+    col = col[not_inf_idx]
+    data = data[not_inf_idx]
+    shape = (np.max(row) + 1, np.max(col) + 1) if not_inf_idx.any() else (0, 0)
+    distance_matrix = coo_matrix((data, (row, col)), shape=shape)
+    return distance_matrix
 
-def test_with_collapser_with_tresh():
-    thresh = 0.1
-    diags_collapsed_thresh = ripser(
-        X,
-        metric='precomputed',
-        maxdim=maxdim,
-        thresh=thresh,
-        collapse_edges=True)['dgms']
-    diags_not_collapsed_thresh = ripser(
-        X, metric='precomputed', maxdim=maxdim, thresh=thresh,
-        collapse_edges=False)['dgms']
 
-    for i in range(maxdim):
-        assert np.array_equal(diags_collapsed_thresh[i],
-                              diags_not_collapsed_thresh[i])
+@pytest.mark.parametrize('thresh', [False, True])
+@given(distance_matrix=get_dense_distance_matrices())
+def test_collapse_consistent_with_no_collapse_dense(thresh, distance_matrix):
+    thresh = np.max(distance_matrix) / 2 if thresh else np.inf
+    maxdim = 3
+    pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
+                         metric='precomputed', collapse_edges=True)['dgms']
+    pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
+                            metric='precomputed', collapse_edges=False)['dgms']
+    for i in range(maxdim + 1):
+        pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
+        pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
+        assert_almost_equal(pd_collapse[i], pd_no_collapse[i])
 
 
-def test_with_collapser_coo():
-    X_tri = np.triu(X, 1)
-    diags_collapsed = ripser(
-        sp.sparse.coo_matrix(X_tri),
-        metric='precomputed',
-        maxdim=maxdim,
-        collapse_edges=True)['dgms']
-    diags_not_collapsed = ripser(
-        sp.sparse.coo_matrix(X_tri),
-        metric='precomputed',
-        maxdim=maxdim,
-        collapse_edges=False)['dgms']
+@pytest.mark.parametrize('thresh', [False, True])
+@given(distance_matrix=get_sparse_distance_matrices())
+def test_collapse_consistent_with_no_collapse_coo(thresh, distance_matrix):
+    if thresh and distance_matrix.nnz:
+        thresh = np.max(distance_matrix) / 2
+    else:
+        thresh = np.inf
+    maxdim = 3
+    pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
+                         metric='precomputed', collapse_edges=True)['dgms']
+    pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
+                            metric='precomputed', collapse_edges=False)['dgms']
+    for i in range(maxdim + 1):
+        pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
+        pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
+        assert_almost_equal(pd_collapse[i], pd_no_collapse[i])
 
-    for i in range(maxdim):
-        assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i])
 
-
-def test_with_collapser_coo_thresh():
-    thresh = 0.1
-    X_tri = np.triu(X, 1)
-    diags_collapsed = ripser(
-        sp.sparse.coo_matrix(X_tri),
-        metric='precomputed',
-        maxdim=maxdim,
-        thresh=thresh,
-        collapse_edges=True)['dgms']
-    diags_not_collapsed = ripser(
-        sp.sparse.coo_matrix(X_tri),
-        metric='precomputed',
-        maxdim=maxdim,
-        thresh=thresh,
-        collapse_edges=False)['dgms']
-
-    for i in range(maxdim):
-        assert np.array_equal(diags_collapsed[i], diags_not_collapsed[i])
+def test_coo_results_independent_of_order():
+    """Regression test for PR #465"""
+    data = np.array([6., 8., 2., 4., 5., 9., 10., 3., 1., 1.])
+    row = np.array([0, 0, 0, 0, 1, 1, 1, 2, 2, 3])
+    col = np.array([4, 1, 3, 2, 4, 3, 2, 3, 4, 4])
+    dm = coo_matrix((data, (row, col)))
+    diagrams = ripser(dm, metric="precomputed")['dgms']
+    diagrams_csr = ripser(dm.tocsr(), metric="precomputed")['dgms']
+    expected = [np.array([[0., 1.],
+                          [0., 1.],
+                          [0., 2.],
+                          [0., 5.],
+                          [0., np.inf]]),
+                np.array([], shape=(0, 2), dtype=np.float64)]
+    for i in range(2):
+        assert np.array_equal(diagrams[i], expected[i])
+        assert np.array_equal(diagrams_csr[i], expected[i])

From 4f91a1b20332adfdb9af1d97d29fbddb5292bf29 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Tue, 8 Sep 2020 15:56:02 +0200
Subject: [PATCH 26/27] Fix array shape

---
 gtda/externals/python/tests/test_ripser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py
index d47708610..1b6bb4955 100644
--- a/gtda/externals/python/tests/test_ripser.py
+++ b/gtda/externals/python/tests/test_ripser.py
@@ -91,7 +91,7 @@ def test_coo_results_independent_of_order():
                           [0., 2.],
                           [0., 5.],
                           [0., np.inf]]),
-                np.array([], shape=(0, 2), dtype=np.float64)]
+                np.array([], dtype=np.float64).reshape(0, 2)]
     for i in range(2):
         assert np.array_equal(diagrams[i], expected[i])
         assert np.array_equal(diagrams_csr[i], expected[i])

From 2e9e574411e4e20262f0359b61551205da4d0e29 Mon Sep 17 00:00:00 2001
From: julian <julian.burellaperez@heig-vd.ch>
Date: Tue, 8 Sep 2020 16:03:34 +0200
Subject: [PATCH 27/27] Disable computation of cocycles by default

Signed-off-by: julian <julian.burellaperez@heig-vd.ch>
---
 gtda/externals/python/ripser_interface.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
index d61dd8de5..077b277cb 100644
--- a/gtda/externals/python/ripser_interface.py
+++ b/gtda/externals/python/ripser_interface.py
@@ -14,7 +14,7 @@ def _lexsort_coo_data(row, col, data):
     return row, col, data
 
 
-def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=1):
+def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=0):
     if coeff == 2:
         ret = gtda_ripser.rips_dm(DParam, DParam.shape[0], coeff, maxHomDim,
                                   thresh, do_cocycles)
@@ -27,7 +27,7 @@ def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=1):
     return ret_rips
 
 
-def DRFDMSparse(I, J, V, N, maxHomDim, thresh=-1, coeff=2, do_cocycles=1):
+def DRFDMSparse(I, J, V, N, maxHomDim, thresh=-1, coeff=2, do_cocycles=0):
     if coeff == 2:
         ret = gtda_ripser.rips_dm_sparse(I, J, V, I.size, N, coeff, maxHomDim,
                                          thresh, do_cocycles)