Skip to content

Commit

Permalink
Merge pull request #72 from jni/fix-path-ordering
Browse files Browse the repository at this point in the history
Fixes #71

The implementations of summarise() and the Skeleton class have diverged significantly. The way that paths are traversed are totally different. Rather than reconciling, I decided to move towards "soft" deprecation of summarise and preferring the Skeleton class as an API for the future.

As a result of #71, I've implemented csr.Skeleton.summarize as a method, which replicates (and improves upon) the functionality fo summarise.

This PR also fixes a number of deprecations in dependencies like NumPy, SciPy, and Matplotlib.
  • Loading branch information
jni committed May 27, 2019
2 parents 5c0d23e + 9dc4b01 commit 2ad31a7
Show file tree
Hide file tree
Showing 9 changed files with 106 additions and 164 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ script:
- set -e
- export MPLBACKEND="ps"
- python -c 'import matplotlib.pyplot as plt; print(plt.get_backend())'
- py.test
- pytest --ignore skan/gui.py
- pip install doctr
- cd doc
- if [[ ! -f schizonts/schizont4_UninfRBC1_01.tif ]]; then
Expand All @@ -62,7 +62,7 @@ script:
fi

after_success:
- if [[ $TRAVIS_PYTHON_VERSION == 3.5 ]]; then make numba-clean; export NUMBA_DISABLE_JIT=1; py.test --cov-report term-missing --cov .; coveralls; fi
- if [[ $TRAVIS_PYTHON_VERSION == 3.5 ]]; then make numba-clean; export NUMBA_DISABLE_JIT=1; py.test --cov-report term-missing --cov . --ignore skan/gui.py; coveralls; fi

env:
global:
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ numpy>=1.11
scipy>=0.18
networkx>=1.11
numba>=0.29
pandas>=0.18
pandas>=0.24
openpyxl>=2.4
xlrd>=1.0
matplotlib>=1.5
scikit-image>=0.12
matplotlib>=3.0
scikit-image>=0.14
imageio>=2.0
tqdm>=4.0
dask>=0.12
Expand Down
85 changes: 79 additions & 6 deletions skan/csr.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def _write_pixel_graph_height(image, height, steps, distances, row, col, data):
def _build_paths(jgraph, indptr, indices, path_data, visited, degrees):
indptr_i = 0
indices_j = 0
# first, process all nodes in a path to an endpoint or junction
for node in range(1, jgraph.shape[0]):
if degrees[node] > 2 or degrees[node] == 1 and not visited[node]:
for neighbor in jgraph.neighbors(node):
Expand All @@ -187,6 +188,17 @@ def _build_paths(jgraph, indptr, indices, path_data, visited, degrees):
indptr[indptr_i + 1] = indptr[indptr_i] + n_steps
indptr_i += 1
indices_j += n_steps
# everything else is by definition in isolated cycles
for node in range(1, jgraph.shape[0]):
if degrees[node] > 0:
if not visited[node]:
visited[node] = True
neighbor = jgraph.neighbors(node)[0]
n_steps = _walk_path(jgraph, node, neighbor, visited, degrees,
indices, path_data, indices_j)
indptr[indptr_i + 1] = indptr[indptr_i] + n_steps
indptr_i += 1
indices_j += n_steps
return indptr_i + 1, indices_j


Expand All @@ -210,7 +222,10 @@ def _walk_path(jgraph, node, neighbor, visited, degrees, indices, path_data,
return j - startj + 1


def _build_skeleton_path_graph(graph, *, _buffer_size_offset=0):
def _build_skeleton_path_graph(graph, *, _buffer_size_offset=None):
if _buffer_size_offset is None:
max_num_cycles = graph.indices.size // 4
_buffer_size_offset = max_num_cycles
degrees = np.diff(graph.indptr)
visited = np.zeros(degrees.shape, dtype=bool)
endpoints = (degrees != 2)
Expand All @@ -220,14 +235,18 @@ def _build_skeleton_path_graph(graph, *, _buffer_size_offset=0):
# the number of points that we need to save to store all skeleton
# paths is equal to the number of pixels plus the sum of endpoint
# degrees minus one (since the endpoints will have been counted once
# already in the number of pixels).
path_indices = np.zeros(graph.indices.size
+ np.sum(endpoint_degrees - 1), dtype=int)
# already in the number of pixels) *plus* the number of isolated
# cycles (since each cycle has one index repeated). We don't know
# the number of cycles ahead of time, but it is bounded by one quarter
# of the number of points.
n_points = (graph.indices.size + np.sum(endpoint_degrees - 1) +
max_num_cycles)
path_indices = np.zeros(n_points, dtype=int)
path_data = np.zeros(path_indices.shape, dtype=float)
m, n = _build_paths(graph, path_indptr, path_indices, path_data,
visited, degrees)
paths = sparse.csr_matrix((path_data[:n], path_indices[:n],
path_indptr[:m]))
path_indptr[:m]), shape=(m-1, n))
return paths


Expand Down Expand Up @@ -292,7 +311,7 @@ class Skeleton:
`keep_images` is True. This is useful for visualization.
"""
def __init__(self, skeleton_image, *, spacing=1, source_image=None,
_buffer_size_offset=0, keep_images=True):
_buffer_size_offset=None, keep_images=True):
graph, coords, degrees = skeleton_to_csgraph(skeleton_image,
spacing=spacing)
if np.issubdtype(skeleton_image.dtype, np.float_):
Expand All @@ -310,6 +329,10 @@ def __init__(self, skeleton_image, *, spacing=1, source_image=None,
self._distances_initialized = False
self.skeleton_image = None
self.source_image = None
self.degrees_image = degrees
self.degrees = np.diff(self.graph.indptr)
self.spacing = (np.asarray(spacing) if not np.isscalar(spacing)
else np.full(skeleton_image.ndim, spacing))
if keep_images:
self.skeleton_image = skeleton_image
self.source_image = source_image
Expand Down Expand Up @@ -425,6 +448,56 @@ def path_stdev(self):
return np.sqrt(np.clip(sumsq/lengths - means*means, 0, None))


def summarize(skel: Skeleton):
"""Compute statistics for every skeleton and branch in ``skel``.
Parameters
----------
skel : skan.csr.Skeleton
A Skeleton object.
Returns
-------
summary : pandas.DataFrame
A summary of the branches including branch length, mean branch value,
branch euclidean distance, etc.
"""
summary = {}
ndim = skel.coordinates.shape[1]
_, skeleton_ids = csgraph.connected_components(skel.graph,
directed=False)
endpoints_src = skel.paths.indices[skel.paths.indptr[:-1]]
endpoints_dst = skel.paths.indices[skel.paths.indptr[1:] - 1]
summary['skeleton-id'] = skeleton_ids[endpoints_src]
summary['node-id-src'] = endpoints_src
summary['node-id-dst'] = endpoints_dst
summary['branch-distance'] = skel.path_lengths()
deg_src = skel.degrees[endpoints_src]
deg_dst = skel.degrees[endpoints_dst]
kind = np.full(deg_src.shape, 2) # default: junction-to-junction
kind[(deg_src == 1) | (deg_dst == 1)] = 1 # tip-junction
kind[(deg_src == 1) & (deg_dst == 1)] = 0 # tip-tip
kind[endpoints_src == endpoints_dst] = 3 # cycle
summary['branch-type'] = kind
summary['mean-pixel-value'] = skel.path_means()
summary['stdev-pixel-value'] = skel.path_stdev()
for i in range(ndim): # keep loops separate for best insertion order
summary[f'image-coord-src-{i}'] = skel.coordinates[endpoints_src, i]
for i in range(ndim):
summary[f'image-coord-dst-{i}'] = skel.coordinates[endpoints_dst, i]
coords_real_src = skel.coordinates[endpoints_src] * skel.spacing
for i in range(ndim):
summary[f'image-coord-src-{i}'] = coords_real_src[:, i]
coords_real_dst = skel.coordinates[endpoints_dst] * skel.spacing
for i in range(ndim):
summary[f'image-coord-dst-{i}'] = coords_real_dst[:, i]
summary['euclidean-distance'] = (
np.sqrt((coords_real_dst - coords_real_src)**2 @ np.ones(ndim))
)
df = pd.DataFrame(summary)
return df


@numba.jit(nopython=True, nogil=True, cache=False) # cache with Numba 1.0
def _compute_distances(graph, path_indptr, path_indices, distances):
for i in range(len(distances)):
Expand Down
3 changes: 1 addition & 2 deletions skan/pipe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from skimage import morphology
import pandas as pd
from .image_stats import image_summary
from .vendored.shape import shape_index
import matplotlib.pyplot as plt
from skimage.feature import shape_index
from concurrent.futures import ThreadPoolExecutor, as_completed
import multiprocessing as mp

Expand Down
4 changes: 2 additions & 2 deletions skan/test/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ def test_write_excel_tables():
'threshold radius': 5e-8}
with temporary_file(suffix='.xlsx') as file:
io.write_excel(file, **kwargs)
tables_in = [pd.read_excel(file, sheetname=name)
tables_in = [pd.read_excel(file, sheet_name=name, index_col=0)
for name in sheet_names]
config_in_df = pd.read_excel(file, sheetname='config')
config_in_df = pd.read_excel(file, sheet_name='config')
config_in = dict(zip(config_in_df['parameters'],
config_in_df['values']))
for table, table_in in zip(tables, tables_in):
Expand Down
19 changes: 17 additions & 2 deletions skan/test/test_skeleton_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,21 @@
from time import process_time
import numpy as np
from numpy.testing import assert_equal, assert_allclose
from skan.csr import Skeleton
from skan.csr import Skeleton, summarize

from skan._testdata import (tinycycle, tinyline, skeleton0, skeleton1,
skeleton2, skeleton3d, topograph1d, skeleton4,
junction_first)


def test_tiny_cycle():
skeleton = Skeleton(tinycycle)
assert skeleton.paths.shape == (1, 5)


def test_skeleton1_topo():
skeleton = Skeleton(skeleton1)
assert skeleton.paths.shape == (4, np.sum(skeleton1) + 1)
assert skeleton.paths.shape == (4, 21)
paths_list = skeleton.paths_list()
reference_paths = [
[8, 6, 1, 2, 3, 4, 5, 7, 11, 10, 13],
Expand Down Expand Up @@ -87,3 +92,13 @@ def test_junction_first():
but not impossible in 2D.
"""
assert [1, 1] not in Skeleton(junction_first).paths_list()


def test_skeleton_summarize():
image = np.zeros(skeleton2.shape, dtype=float)
image[skeleton2] = 1 + np.random.random(np.sum(skeleton2))
skeleton = Skeleton(image)
summary = summarize(skeleton)
assert set(summary['skeleton-id']) == {1, 2}
assert (np.all(summary['mean-pixel-value'] < 2)
and np.all(summary['mean-pixel-value'] > 1))
144 changes: 0 additions & 144 deletions skan/vendored/shape.py

This file was deleted.

4 changes: 1 addition & 3 deletions skan/vendored/thresholding.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import itertools
import numpy as np
from scipy import ndimage as ndi
from skimage.transform import integral_image
from skimage import util
from skimage.util import dtype_limits
import numba

Expand All @@ -12,7 +10,7 @@ def broadcast_mgrid(arrays):
ndim = len(shape)
result = []
for i, arr in enumerate(arrays, start=1):
reshaped = np.broadcast_to(arr[[...] + [np.newaxis] * (ndim - i)],
reshaped = np.broadcast_to(arr[(...,) + (np.newaxis,) * (ndim - i)],
shape)
result.append(reshaped)
return result
Expand Down
1 change: 1 addition & 0 deletions test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ pytest>=3.0
pytest-cov>=2.3
coverage>=4.2
hypothesis>=3.6
pytest-faulthandler

0 comments on commit 2ad31a7

Please sign in to comment.