Merge pull request #72 from jni/fix-path-ordering

Fixes #71 The implementations of summarise() and the Skeleton class have diverged significantly. The way that paths are traversed are totally different. Rather than reconciling, I decided to move towards "soft" deprecation of summarise and preferring the Skeleton class as an API for the future. As a result of #71, I've implemented csr.Skeleton.summarize as a method, which replicates (and improves upon) the functionality fo summarise. This PR also fixes a number of deprecations in dependencies like NumPy, SciPy, and Matplotlib.
jni · May 27, 2019 · 2ad31a7 · 2ad31a7
2 parents 5c0d23e + 9dc4b01
commit 2ad31a7
Show file tree

Hide file tree

Showing 9 changed files with 106 additions and 164 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -42,7 +42,7 @@ script:
   - set -e
   - export MPLBACKEND="ps"
   - python -c 'import matplotlib.pyplot as plt; print(plt.get_backend())'
-  - py.test
+  - pytest --ignore skan/gui.py
   - pip install doctr
   - cd doc
   - if [[ ! -f schizonts/schizont4_UninfRBC1_01.tif ]]; then
@@ -62,7 +62,7 @@ script:
     fi
 
 after_success:
-  - if [[ $TRAVIS_PYTHON_VERSION == 3.5 ]]; then make numba-clean; export NUMBA_DISABLE_JIT=1; py.test --cov-report term-missing --cov .; coveralls; fi
+  - if [[ $TRAVIS_PYTHON_VERSION == 3.5 ]]; then make numba-clean; export NUMBA_DISABLE_JIT=1; py.test --cov-report term-missing --cov . --ignore skan/gui.py; coveralls; fi
 
 env:
   global:

diff --git a/requirements.txt b/requirements.txt
@@ -2,11 +2,11 @@ numpy>=1.11
 scipy>=0.18
 networkx>=1.11
 numba>=0.29
-pandas>=0.18
+pandas>=0.24
 openpyxl>=2.4
 xlrd>=1.0
-matplotlib>=1.5
-scikit-image>=0.12
+matplotlib>=3.0
+scikit-image>=0.14
 imageio>=2.0
 tqdm>=4.0
 dask>=0.12

diff --git a/skan/csr.py b/skan/csr.py
@@ -176,6 +176,7 @@ def _write_pixel_graph_height(image, height, steps, distances, row, col, data):
 def _build_paths(jgraph, indptr, indices, path_data, visited, degrees):
     indptr_i = 0
     indices_j = 0
+    # first, process all nodes in a path to an endpoint or junction
     for node in range(1, jgraph.shape[0]):
         if degrees[node] > 2 or degrees[node] == 1 and not visited[node]:
             for neighbor in jgraph.neighbors(node):
@@ -187,6 +188,17 @@ def _build_paths(jgraph, indptr, indices, path_data, visited, degrees):
                     indptr[indptr_i + 1] = indptr[indptr_i] + n_steps
                     indptr_i += 1
                     indices_j += n_steps
+    # everything else is by definition in isolated cycles
+    for node in range(1, jgraph.shape[0]):
+        if degrees[node] > 0:
+            if not visited[node]:
+                visited[node] = True
+                neighbor = jgraph.neighbors(node)[0]
+                n_steps = _walk_path(jgraph, node, neighbor, visited, degrees,
+                                     indices, path_data, indices_j)
+                indptr[indptr_i + 1] = indptr[indptr_i] + n_steps
+                indptr_i += 1
+                indices_j += n_steps
     return indptr_i + 1, indices_j
 
 
@@ -210,7 +222,10 @@ def _walk_path(jgraph, node, neighbor, visited, degrees, indices, path_data,
     return j - startj + 1
 
 
-def _build_skeleton_path_graph(graph, *, _buffer_size_offset=0):
+def _build_skeleton_path_graph(graph, *, _buffer_size_offset=None):
+    if _buffer_size_offset is None:
+        max_num_cycles = graph.indices.size // 4
+        _buffer_size_offset = max_num_cycles
     degrees = np.diff(graph.indptr)
     visited = np.zeros(degrees.shape, dtype=bool)
     endpoints = (degrees != 2)
@@ -220,14 +235,18 @@ def _build_skeleton_path_graph(graph, *, _buffer_size_offset=0):
     # the number of points that we need to save to store all skeleton
     # paths is equal to the number of pixels plus the sum of endpoint
     # degrees minus one (since the endpoints will have been counted once
-    # already in the number of pixels).
-    path_indices = np.zeros(graph.indices.size
-                            + np.sum(endpoint_degrees - 1), dtype=int)
+    # already in the number of pixels) *plus* the number of isolated
+    # cycles (since each cycle has one index repeated). We don't know
+    # the number of cycles ahead of time, but it is bounded by one quarter
+    # of the number of points.
+    n_points = (graph.indices.size + np.sum(endpoint_degrees - 1) +
+                max_num_cycles)
+    path_indices = np.zeros(n_points, dtype=int)
     path_data = np.zeros(path_indices.shape, dtype=float)
     m, n = _build_paths(graph, path_indptr, path_indices, path_data,
                         visited, degrees)
     paths = sparse.csr_matrix((path_data[:n], path_indices[:n],
-                               path_indptr[:m]))
+                               path_indptr[:m]), shape=(m-1, n))
     return paths
 
 
@@ -292,7 +311,7 @@ class Skeleton:
         `keep_images` is True. This is useful for visualization.
     """
     def __init__(self, skeleton_image, *, spacing=1, source_image=None,
-                 _buffer_size_offset=0, keep_images=True):
+                 _buffer_size_offset=None, keep_images=True):
         graph, coords, degrees = skeleton_to_csgraph(skeleton_image,
                                                      spacing=spacing)
         if np.issubdtype(skeleton_image.dtype, np.float_):
@@ -310,6 +329,10 @@ def __init__(self, skeleton_image, *, spacing=1, source_image=None,
         self._distances_initialized = False
         self.skeleton_image = None
         self.source_image = None
+        self.degrees_image = degrees
+        self.degrees = np.diff(self.graph.indptr)
+        self.spacing = (np.asarray(spacing) if not np.isscalar(spacing)
+                        else np.full(skeleton_image.ndim, spacing))
         if keep_images:
             self.skeleton_image = skeleton_image
             self.source_image = source_image
@@ -425,6 +448,56 @@ def path_stdev(self):
         return np.sqrt(np.clip(sumsq/lengths - means*means, 0, None))
 
 
+def summarize(skel: Skeleton):
+    """Compute statistics for every skeleton and branch in ``skel``.
+
+    Parameters
+    ----------
+    skel : skan.csr.Skeleton
+        A Skeleton object.
+
+    Returns
+    -------
+    summary : pandas.DataFrame
+        A summary of the branches including branch length, mean branch value,
+        branch euclidean distance, etc.
+    """
+    summary = {}
+    ndim = skel.coordinates.shape[1]
+    _, skeleton_ids = csgraph.connected_components(skel.graph,
+                                                   directed=False)
+    endpoints_src = skel.paths.indices[skel.paths.indptr[:-1]]
+    endpoints_dst = skel.paths.indices[skel.paths.indptr[1:] - 1]
+    summary['skeleton-id'] = skeleton_ids[endpoints_src]
+    summary['node-id-src'] = endpoints_src
+    summary['node-id-dst'] = endpoints_dst
+    summary['branch-distance'] = skel.path_lengths()
+    deg_src = skel.degrees[endpoints_src]
+    deg_dst = skel.degrees[endpoints_dst]
+    kind = np.full(deg_src.shape, 2)  # default: junction-to-junction
+    kind[(deg_src == 1) | (deg_dst == 1)] = 1  # tip-junction
+    kind[(deg_src == 1) & (deg_dst == 1)] = 0  # tip-tip
+    kind[endpoints_src == endpoints_dst] = 3  # cycle
+    summary['branch-type'] = kind
+    summary['mean-pixel-value'] = skel.path_means()
+    summary['stdev-pixel-value'] = skel.path_stdev()
+    for i in range(ndim):  # keep loops separate for best insertion order
+        summary[f'image-coord-src-{i}'] = skel.coordinates[endpoints_src, i]
+    for i in range(ndim):
+        summary[f'image-coord-dst-{i}'] = skel.coordinates[endpoints_dst, i]
+    coords_real_src = skel.coordinates[endpoints_src] * skel.spacing
+    for i in range(ndim):
+        summary[f'image-coord-src-{i}'] = coords_real_src[:, i]
+    coords_real_dst = skel.coordinates[endpoints_dst] * skel.spacing
+    for i in range(ndim):
+        summary[f'image-coord-dst-{i}'] = coords_real_dst[:, i]
+    summary['euclidean-distance'] = (
+            np.sqrt((coords_real_dst - coords_real_src)**2 @ np.ones(ndim))
+    )
+    df = pd.DataFrame(summary)
+    return df
+
+
 @numba.jit(nopython=True, nogil=True, cache=False)  # cache with Numba 1.0
 def _compute_distances(graph, path_indptr, path_indices, distances):
     for i in range(len(distances)):

diff --git a/skan/pipe.py b/skan/pipe.py
@@ -6,8 +6,7 @@
 from skimage import morphology
 import pandas as pd
 from .image_stats import image_summary
-from .vendored.shape import shape_index
-import matplotlib.pyplot as plt
+from skimage.feature import shape_index
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import multiprocessing as mp
 

diff --git a/skan/test/test_io.py b/skan/test/test_io.py
@@ -22,9 +22,9 @@ def test_write_excel_tables():
                         'threshold radius': 5e-8}
     with temporary_file(suffix='.xlsx') as file:
         io.write_excel(file, **kwargs)
-        tables_in = [pd.read_excel(file, sheetname=name)
+        tables_in = [pd.read_excel(file, sheet_name=name, index_col=0)
                      for name in sheet_names]
-        config_in_df = pd.read_excel(file, sheetname='config')
+        config_in_df = pd.read_excel(file, sheet_name='config')
         config_in = dict(zip(config_in_df['parameters'],
                              config_in_df['values']))
     for table, table_in in zip(tables, tables_in):

diff --git a/skan/test/test_skeleton_class.py b/skan/test/test_skeleton_class.py
@@ -3,16 +3,21 @@
 from time import process_time
 import numpy as np
 from numpy.testing import assert_equal, assert_allclose
-from skan.csr import Skeleton
+from skan.csr import Skeleton, summarize
 
 from skan._testdata import (tinycycle, tinyline, skeleton0, skeleton1,
                             skeleton2, skeleton3d, topograph1d, skeleton4,
                             junction_first)
 
 
+def test_tiny_cycle():
+    skeleton = Skeleton(tinycycle)
+    assert skeleton.paths.shape == (1, 5)
+
+
 def test_skeleton1_topo():
     skeleton = Skeleton(skeleton1)
-    assert skeleton.paths.shape == (4, np.sum(skeleton1) + 1)
+    assert skeleton.paths.shape == (4, 21)
     paths_list = skeleton.paths_list()
     reference_paths = [
         [8, 6, 1, 2, 3, 4, 5, 7, 11, 10, 13],
@@ -87,3 +92,13 @@ def test_junction_first():
     but not impossible in 2D.
     """
     assert [1, 1] not in Skeleton(junction_first).paths_list()
+
+
+def test_skeleton_summarize():
+    image = np.zeros(skeleton2.shape, dtype=float)
+    image[skeleton2] = 1 + np.random.random(np.sum(skeleton2))
+    skeleton = Skeleton(image)
+    summary = summarize(skeleton)
+    assert set(summary['skeleton-id']) == {1, 2}
+    assert (np.all(summary['mean-pixel-value'] < 2)
+            and np.all(summary['mean-pixel-value'] > 1))
diff --git a/skan/vendored/shape.py b/skan/vendored/shape.py
diff --git a/skan/vendored/thresholding.py b/skan/vendored/thresholding.py
@@ -1,8 +1,6 @@
 import itertools
 import numpy as np
-from scipy import ndimage as ndi
 from skimage.transform import integral_image
-from skimage import util
 from skimage.util import dtype_limits
 import numba
 
@@ -12,7 +10,7 @@ def broadcast_mgrid(arrays):
     ndim = len(shape)
     result = []
     for i, arr in enumerate(arrays, start=1):
-        reshaped = np.broadcast_to(arr[[...] + [np.newaxis] * (ndim - i)],
+        reshaped = np.broadcast_to(arr[(...,) + (np.newaxis,) * (ndim - i)],
                                    shape)
         result.append(reshaped)
     return result

diff --git a/test-requirements.txt b/test-requirements.txt
@@ -2,3 +2,4 @@ pytest>=3.0
 pytest-cov>=2.3
 coverage>=4.2
 hypothesis>=3.6
+pytest-faulthandler