Skip to content

Commit

Permalink
making test pass -- not very clean nn function
Browse files Browse the repository at this point in the history
  • Loading branch information
nperraud committed Jul 26, 2019
1 parent ddc290e commit 9ad6bff
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 172 deletions.
6 changes: 4 additions & 2 deletions pygsp/_nearest_neighbor.py
Expand Up @@ -6,6 +6,8 @@
from scipy import sparse, spatial
from pygsp import utils

_logger = utils.build_logger(__name__)

def _scipy_pdist(features, metric, order, kind, k, radius, params):
if params:
raise ValueError('unexpected parameters {}'.format(params))
Expand Down Expand Up @@ -211,12 +213,13 @@ def nearest_neighbor(features, metric='euclidean', order=2, kind='knn', k=10, ra
return neighbors, distances


def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None):
def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, kind = None, k=None):
'''Build a sparse distance matrix from nearest neighbors'''
n_edges = [len(n) - 1 for n in neighbors] # remove distance to self
if safe and kind is None:
raise ValueError('Please specify "kind" to "knn" or "radius" to use the safe mode')

n_vertices = len(n_edges)
if safe and kind == 'radius':
n_disconnected = np.sum(np.asarray(n_edges) == 0)
if n_disconnected > 0:
Expand All @@ -228,7 +231,6 @@ def sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=False, ki
row = np.empty_like(value, dtype=np.int)
col = np.empty_like(value, dtype=np.int)
start = 0
n_vertices = len(n_edges)
for vertex in range(n_vertices):
if safe and kind == 'knn':
assert n_edges[vertex] == k
Expand Down
170 changes: 4 additions & 166 deletions pygsp/graphs/nngraphs/nngraph.py
Expand Up @@ -9,144 +9,12 @@

from pygsp import utils
from pygsp.graphs import Graph # prevent circular import in Python < 3.5
from pygsp._nearest_neighbor import nearest_neighbor, sparse_distance_matrix


_logger = utils.build_logger(__name__)


def _scipy_pdist(features, metric, order, kind, k, radius, params):
if params:
raise ValueError('unexpected parameters {}'.format(params))
metric = 'cityblock' if metric == 'manhattan' else metric
metric = 'chebyshev' if metric == 'max_dist' else metric
params = dict(metric=metric)
if metric == 'minkowski':
params['p'] = order
dist = spatial.distance.pdist(features, **params)
dist = spatial.distance.squareform(dist)
if kind == 'knn':
neighbors = np.argsort(dist)[:, :k+1]
distances = np.take_along_axis(dist, neighbors, axis=-1)
elif kind == 'radius':
distances = []
neighbors = []
for distance in dist:
neighbor = np.flatnonzero(distance < radius)
neighbors.append(neighbor)
distances.append(distance[neighbor])
return neighbors, distances


def _scipy_kdtree(features, _, order, kind, k, radius, params):
if order is None:
raise ValueError('invalid metric for scipy-kdtree')
eps = params.pop('eps', 0)
tree = spatial.KDTree(features, **params)
params = dict(p=order, eps=eps)
if kind == 'knn':
params['k'] = k + 1
elif kind == 'radius':
params['k'] = None
params['distance_upper_bound'] = radius
distances, neighbors = tree.query(features, **params)
return neighbors, distances


def _scipy_ckdtree(features, _, order, kind, k, radius, params):
if order is None:
raise ValueError('invalid metric for scipy-kdtree')
eps = params.pop('eps', 0)
tree = spatial.cKDTree(features, **params)
params = dict(p=order, eps=eps, n_jobs=-1)
if kind == 'knn':
params['k'] = k + 1
elif kind == 'radius':
params['k'] = features.shape[0] # number of vertices
params['distance_upper_bound'] = radius
distances, neighbors = tree.query(features, **params)
if kind == 'knn':
return neighbors, distances
elif kind == 'radius':
dist = []
neigh = []
for distance, neighbor in zip(distances, neighbors):
mask = (distance != np.inf)
dist.append(distance[mask])
neigh.append(neighbor[mask])
return neigh, dist


def _flann(features, metric, order, kind, k, radius, params):
if metric == 'max_dist':
raise ValueError('flann gives wrong results for metric="max_dist".')
try:
import cyflann as cfl
except Exception as e:
raise ImportError('Cannot import cyflann. Choose another nearest '
'neighbors backend or try to install it with '
'pip (or conda) install cyflann. '
'Original exception: {}'.format(e))
cfl.set_distance_type(metric, order=order)
index = cfl.FLANNIndex()
index.build_index(features, **params)
# I tried changing the algorithm and testing performance on huge matrices,
# but the default parameters seems to work best.
if kind == 'knn':
neighbors, distances = index.nn_index(features, k+1)
if metric == 'euclidean':
np.sqrt(distances, out=distances)
elif metric == 'minkowski':
np.power(distances, 1/order, out=distances)
elif kind == 'radius':
distances = []
neighbors = []
if metric == 'euclidean':
radius = radius**2
elif metric == 'minkowski':
radius = radius**order
n_vertices, _ = features.shape
for vertex in range(n_vertices):
neighbor, distance = index.nn_radius(features[vertex, :], radius)
distances.append(distance)
neighbors.append(neighbor)
if metric == 'euclidean':
distances = list(map(np.sqrt, distances))
elif metric == 'minkowski':
distances = list(map(lambda d: np.power(d, 1/order), distances))
index.free_index()
return neighbors, distances


def _nmslib(features, metric, order, kind, k, _, params):
if kind == 'radius':
raise ValueError('nmslib does not support kind="radius".')
if metric == 'minkowski':
raise ValueError('nmslib does not support metric="minkowski".')
try:
import nmslib as nms
except Exception as e:
raise ImportError('Cannot import nmslib. Choose another nearest '
'neighbors backend or try to install it with '
'pip (or conda) install nmslib. '
'Original exception: {}'.format(e))
n_vertices, _ = features.shape
params_index = params.pop('index', None)
params_query = params.pop('query', None)
metric = 'l2' if metric == 'euclidean' else metric
metric = 'l1' if metric == 'manhattan' else metric
metric = 'linf' if metric == 'max_dist' else metric
index = nms.init(space=metric, **params)
index.addDataPointBatch(features)
index.createIndex(params_index)
if params_query is not None:
index.setQueryTimeParams(params_query)
results = index.knnQueryBatch(features, k=k+1)
neighbors, distances = zip(*results)
distances = np.concatenate(distances).reshape(n_vertices, k+1)
neighbors = np.concatenate(neighbors).reshape(n_vertices, k+1)
return neighbors, distances


class NNGraph(Graph):
r"""Nearest-neighbor graph.
Expand Down Expand Up @@ -437,39 +305,9 @@ def __init__(self, features, standardize=False,
else:
raise ValueError('Invalid kind "{}".'.format(kind))

try:
function = globals()['_' + backend.replace('-', '_')]
except KeyError:
raise ValueError('Invalid backend "{}".'.format(backend))
neighbors, distances = function(features, metric, order,
kind, k, radius, kwargs)

n_edges = [len(n) - 1 for n in neighbors] # remove distance to self

if kind == 'radius':
n_disconnected = np.sum(np.asarray(n_edges) == 0)
if n_disconnected > 0:
_logger.warning('{} vertices (out of {}) are disconnected. '
'Consider increasing the radius or setting '
'kind=knn.'.format(n_disconnected, n_vertices))

value = np.empty(sum(n_edges), dtype=np.float)
row = np.empty_like(value, dtype=np.int)
col = np.empty_like(value, dtype=np.int)
start = 0
for vertex in range(n_vertices):
if kind == 'knn':
assert n_edges[vertex] == k
end = start + n_edges[vertex]
value[start:end] = distances[vertex][1:]
row[start:end] = np.full(n_edges[vertex], vertex)
col[start:end] = neighbors[vertex][1:]
start = end
W = sparse.csr_matrix((value, (row, col)), (n_vertices, n_vertices))

# Enforce symmetry. May have been broken by k-NN. Checking symmetry
# with np.abs(W - W.T).sum() is as costly as the symmetrization itself.
W = utils.symmetrize(W, method='fill')
neighbors, distances = nearest_neighbor(features, metric=metric, order=order,
kind=kind, k=k, radius=radius, backend=backend, **kwargs)
W = sparse_distance_matrix(neighbors, distances, symmetrize=True, safe=True, kind = kind, k=k)

if kernel_width is None:
if kind == 'knn':
Expand Down
7 changes: 4 additions & 3 deletions pygsp/tests/test_graphs.py
Expand Up @@ -354,11 +354,12 @@ def test_nngraph(self, n_vertices=24):
Graph = graphs.NNGraph
data = np.random.RandomState(42).uniform(size=(n_vertices, 3))
metrics = ['euclidean', 'manhattan', 'max_dist', 'minkowski']
backends = ['scipy-kdtree', 'scipy-ckdtree', 'flann', 'nmslib']
# Not testing , 'flann', 'nmslib' as they are tested in test_nearest_neighbor
backends = ['scipy-kdtree', 'scipy-ckdtree']

for metric in metrics:
for kind in ['knn', 'radius']:
params = dict(features=data, metric=metric, kind=kind, k=4)
params = dict(features=data, metric=metric, kind=kind, k=6)
ref = Graph(backend='scipy-pdist', **params)
for backend in backends:
# Unsupported combinations.
Expand Down Expand Up @@ -561,4 +562,4 @@ def test_grid2dimgpatches(self):
graphs.Grid2dImgPatches(img=self._img, patch_shape=(3, 3))


suite = unittest.TestLoader().loadTestsFromTestCase(TestCase)
suite = unittest.TestLoader().loadTestsFromTestCase(TestCase)
3 changes: 2 additions & 1 deletion pygsp/tests/test_nearest_neighbor.py
Expand Up @@ -11,7 +11,7 @@ def test_nngraph(self, n_vertices=100):
for metric in metrics:
for kind in ['knn', 'radius']:
for backend in backends:
params = dict(features=data, metric=metric, kind=kind, radius=0.25, k=10)
params = dict(features=data, metric=metric, kind=kind, radius=0.25, k=8)

ref_nn, ref_d = nearest_neighbor(backend='scipy-pdist', **params)
# Unsupported combinations.
Expand Down Expand Up @@ -76,3 +76,4 @@ def test_sparse_distance_matrix(self):


suite = unittest.TestLoader().loadTestsFromTestCase(TestCase)

0 comments on commit 9ad6bff

Please sign in to comment.