janelia-flyem · jni · Sep 21, 2016 · Jul 2, 2016 · Jul 2, 2016 · Jul 6, 2016
diff --git a/.travis.yml b/.travis.yml
@@ -15,15 +15,11 @@ install:
   - conda update -q conda
 
   # try to create env, but ignore if it fails -- probably cached.
-  - conda env create || true
+  - conda env create -q || true
   - source activate gala 
   - conda info -a
 
-  # custom package not available from conda
-  - pip install viridis
-
-  # install testing and coverage packages
-  - pip install pytest pytest-cov
+  # install coveralls for reporting
   - pip install coveralls
 
   # Install gala

diff --git a/environment.yml b/environment.yml
@@ -1,23 +1,25 @@
 name: gala
+channels:
+- conda-forge
 dependencies:
-- python=3.5*
-- setuptools=19.6*
-- cython=0.23*
-- pytest=2.8*
-- numpy=1.11*
-- nose=1.3*
-- numpydoc=0.5*
-- h5py=2.6*
-- matplotlib=1.5*
-- scipy=0.17*
-- scikit-learn=0.17*
-- scikit-image=0.12*
-- networkx=1.10*
-- jupyter=1.0*
-- pyzmq=15.*
-- pip:
-  - coverage>=4.0
-  - pytest-cov>=2.2
-  - viridis>=0.4
-  - memory_profiler>=0.41
-  - line_profiler>=1.0
+- python>=3.5*
+- setuptools>=19.6*
+- cython>=0.23*
+- pytest>=3*
+- nose>=1.3
+- numpy>=1.11*
+- numpydoc>=0.5*
+- h5py>=2.6*
+- matplotlib>=1.5*
+- scipy>=0.17*
+- scikit-learn>=0.17*
+- scikit-image>=0.12*
+- networkx>=1.10*
+- jupyter>=1.0*
+- pyzmq>=15.*
+- coverage>=4.0
+- pytest-cov>=2.2
+- viridis>=0.4
+- memory_profiler>=0.41
+- line_profiler>=1.1
+- tqdm>=4.7
diff --git a/gala/agglo.py b/gala/agglo.py
@@ -12,6 +12,7 @@
 from numpy import (array, mean, zeros, zeros_like, where, unique,
     newaxis, nonzero, median, float, ones, arange, inf, isnan,
     flatnonzero, unravel_index, bincount)
+from tqdm import tqdm
 import numpy as np
 from scipy.stats import sem
 from scipy import sparse
@@ -978,6 +979,8 @@ def agglomerate(self, threshold=0.5, save_history=False):
         if self.merge_queue.is_empty():
             self.merge_queue = self.build_merge_queue()
         history, scores, evaluation = [], [], []
+        # total merges is number of nodes minus boundary_node minus one.
+        progress = tqdm(total=self.number_of_nodes() - 2)
         while len(self.merge_queue) > 0 and \
                                         self.merge_queue.peek()[0] < threshold:
             merge_priority, _, n1, n2 = self.merge_queue.pop()
@@ -989,6 +992,8 @@ def agglomerate(self, threshold=0.5, save_history=False):
                 evaluation.append(
                     (self.number_of_nodes()-1, self.split_vi())
                 )
+            progress.update(1)
+        progress.close()
         if save_history:
             return history, scores, evaluation
 
@@ -2001,12 +2006,20 @@ def write_plaza_json(self, fout, synapsejson=None, offsetz=0):
 
     def ncut(self, num_clusters=10, kmeans_iters=5, sigma=255.0*20, nodes=None,
             **kwargs):
-        """Run normalized cuts on the current set of superpixels.
-           Keyword arguments:
-               num_clusters -- number of clusters to compute
-               kmeans_iters -- # iterations to run kmeans when clustering
-               sigma -- sigma value when setting up weight matrix
-           Return value: None
+        """Run normalized cuts on the current set of fragments.
+
+        Parameters
+        ----------
+        num_clusters : int, optional
+            The desired number of clusters
+        kmeans_iters : int, optional
+            The maximum number of iterations for the kmeans clustering
+            of the Laplacian eigenvectors.
+        sigma : float, optional
+            The damping factor on the edge weights. The higher this value,
+            the closer to 1 (the maximum) edges with large weights will be.
+        nodes : collection of int, optional
+            Restrict the ncut to the listed nodes.
         """
         if nodes is None:
             nodes = self.nodes()
@@ -2033,21 +2046,24 @@ def cluster_by_labels(self, labels, nodes=None):
                 self.merge_nodes(node1, node)
 
 
-    def compute_W(self, merge_priority_function, sigma=255.0*20, nodes=None):
-        """ Computes the weight matrix for clustering"""
+    def compute_W(self, distance_function, sigma=255.0*20, nodes=None):
+        """Compute the weight matrix for n-cut clustering.
+
+        See `ncut` for parameters.
+        """
         if nodes is None:
             nodes = array(self.nodes())
         n = len(nodes)
         nodes2ind = dict(zip(nodes, range(n)))
-        W = lil_matrix((n,n))
+        W = lil_matrix((n, n))
         for u, v in self.real_edges(nodes):
             try:
                 i, j = nodes2ind[u], nodes2ind[v]
             except KeyError:
                 continue
-            w = merge_priority_function(self, ((u, v)))
-            W[i,j] = W[j,i] = np.exp(-w**2/sigma)
-        return W
+            w = distance_function(self, (u, v))
+            W[i, j] = W[j, i] = np.exp(-w**2 / sigma)
+        return W.tocsr()
 
 
     def update_frozen_sets(self, n1, n2):

diff --git a/gala/agglo2.py b/gala/agglo2.py
@@ -59,7 +59,7 @@ def sparse_boundaries(coo_boundaries):
     bounds : SparseLOL
         A map of edge indices to locations in the volume.
     """
-    edge_to_idx = coo_boundaries.tocsr()
+    edge_to_idx = coo_boundaries.copy().tocsr()
     # edge_to_idx: CSR matrix that maps each edge to a unique integer
     # we don't use the ID 0 so that empty spots can be used to mean "no ID".
     edge_to_idx.data = np.arange(1, len(edge_to_idx.data) + 1, dtype=np.int_)

diff --git a/gala/annotefinder.py b/gala/annotefinder.py
diff --git a/gala/evaluate.py b/gala/evaluate.py
@@ -42,79 +42,6 @@ def nzcol(mat, row_idx):
     return mat[row_idx].nonzero()[1]
 
 
-def sparse_min(mat, axis=None):
-    """Compute the minimum value in a sparse matrix (optionally over an axis).
-
-    This function mimics the numpy.min() API for sparse.CSC or CSR matrices.
-
-    Parameters
-    ----------
-    mat : a scipy.sparse csc or csr matrix
-        The matrix for which to compute the min.
-    axis : int in {0, 1}, optional
-        Compute the minimum over each column (`axis=0`) or over each row
-        (`axis=1`). By default, compute over entire matrix.
-
-    Returns
-    -------
-    mn : mat.dtype (if `axis=None`) or np.ndarray of shape (mat.shape[1-axis],)
-        The minimum value in the array or along an axis.
-    """
-    mn = - sparse_max(-mat, axis)
-    return mn
-
-
-def sparse_max(mat, axis=None):
-    """Compute the maximum value in a sparse matrix (optionally over an axis).
-
-    This function mimics the numpy.max() API for sparse.CSC or CSR matrices.
-
-    Parameters
-    ----------
-    mat : a scipy.sparse csc or csr matrix
-        The matrix for which to compute the max.
-    axis : int in {0, 1}, optional
-        Compute the maximum over each column (`axis=0`) or over each row
-        (`axis=1`). By default, compute over entire matrix.
-
-    Returns
-    -------
-    mx : mat.dtype (if `axis=None`) or np.ndarray of shape (mat.shape[1-axis],)
-        The maximum value in the array or along an axis.
-    """
-    if type(mat) == sparse.csr_matrix:
-        mat = mat.tocsc()
-    if axis is None:
-        mx = np.max(mat.data)
-    elif axis == 0:
-        mx = sparse_csr_row_max(mat.T)
-    elif axis == 1:
-        mx = sparse_csr_row_max(mat.tocsr())
-    else:
-        raise ValueError("Invalid axis %i for matrix (2 dimensional)." % axis)
-    return mx
-
-
-def sparse_csr_row_max(csr_mat):
-    """Compute maximum over each row of a CSR format sparse matrix.
-
-    Parameters
-    ----------
-    csr_mat : scipy.sparse.csr_matrix
-        The input matrix.
-
-    Returns
-    -------
-    mx : np.ndarray of shape `(mat.shape[0],)`
-        The maximum along every row.
-    """
-    ret = np.zeros(csr_mat.shape[0])
-    row_diff = np.diff(csr_mat.indptr)
-    ret[row_diff != 0] = np.maximum.reduceat(csr_mat.data,
-                                             csr_mat.indptr[:-1][row_diff > 0])
-    return ret
-
-
 def pixel_wise_boundary_precision_recall(pred, gt):
     """Evaluate voxel prediction accuracy against a ground truth.
 
@@ -1254,12 +1181,12 @@ def vi_tables(x, y=None, ignore_x=[0], ignore_y=[0]):
 
     # Calculate log conditional probabilities and entropies
     lpygx = np.zeros(np.shape(px))
-    lpygx[nzx] = xlogx(divide_rows(nzpxy, nzpx)).sum(axis=1) 
+    lpygx[nzx] = xlogx(divide_rows(nzpxy, nzpx)).sum(axis=1).ravel()
                         # \sum_x{p_{y|x} \log{p_{y|x}}}
     hygx = -(px*lpygx) # \sum_x{p_x H(Y|X=x)} = H(Y|X)
 
     lpxgy = np.zeros(np.shape(py))
-    lpxgy[nzy] = xlogx(divide_columns(nzpxy, nzpy)).sum(axis=0)
+    lpxgy[nzy] = xlogx(divide_columns(nzpxy, nzpy)).sum(axis=0).ravel()
     hxgy = -(py*lpxgy)
 
     return [pxy] + list(map(np.asarray, [px, py, hxgy, hygx, lpygx, lpxgy]))