Skip to content

Commit

Permalink
Added an algorithm that applies a simple distance function
Browse files Browse the repository at this point in the history
  • Loading branch information
siebenkopf committed Feb 11, 2016
1 parent fb9964b commit f41083f
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 15 deletions.
110 changes: 110 additions & 0 deletions bob/bio/base/algorithm/Distance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Manuel Gunther <siebenkopf@googlemail.com>

import numpy
import scipy.spatial

from .Algorithm import Algorithm

import logging
logger = logging.getLogger("bob.bio.base")

class Distance (Algorithm):
"""This class defines a simple distance measure between two features.
Independent of the actual shape, each feature vector is treated as a one-dimensional vector, and the specified distance function is used to compute the distance between the two features.
If the given ``distance_function`` actually computes a distance, we simply return its negative value (as all :py:class:`Algorithm`'s are supposed to return similarity values).
If the ``distance_function`` computes similarities, the similarity value is returned unaltered.
**Parameters:**
``distance_function`` : callable
A function taking two 1D arrays and returning a ``float``
``is_distance_function`` : bool
Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better)
``kwargs`` : ``key=value`` pairs
A list of keyword arguments directly passed to the :py:class:`Algorithm` base class constructor.
"""

def __init__(
self,
distance_function = scipy.spatial.distance.euclidean,
is_distance_function = True,
**kwargs # parameters directly sent to the base class
):

# call base class constructor and register that the algorithm performs a projection
Algorithm.__init__(
self,
distance_function = str(distance_function),
is_distance_function = is_distance_function,

**kwargs
)

self.distance_function = distance_function
self.factor = -1. if is_distance_function else 1.

def _check_feature(self, feature):
"""Checks that the features are appropriate"""
if not isinstance(feature, numpy.ndarray):
raise ValueError("The given feature should be of type numpy.ndarray")

def enroll(self, enroll_features):
"""enroll(enroll_features) -> model
Enrolls the model by storing all given input vectors.
**Parameters:**
``enroll_features`` : [:py:class:`numpy.ndarray`]
The list of projected features to enroll the model from.
**Returns:**
``model`` : 2D :py:class:`numpy.ndarray`
The enrolled model.
"""
assert len(enroll_features)
[self._check_feature(feature) for feature in enroll_features]
# just store all the features
return numpy.vstack([f.flatten() for f in enroll_features])

def score(self, model, probe):
"""score(model, probe) -> float
Computes the distance of the model to the probe using the distance function specified in the constructor.
**Parameters:**
``model`` : 2D :py:class:`numpy.ndarray`
The model storing all enrollment features
``probe`` : :py:class:`numpy.ndarray`
The probe feature vector
**Returns:**
``score`` : float
A similarity value between ``model`` and ``probe``
"""
self._check_feature(probe)
probe = probe.flatten()
# return the negative distance (as a similarity measure)
if model.ndim == 2:
# we have multiple models, so we use the multiple model scoring
return self.score_for_multiple_models(model, probe)
else:
# single model, single probe (multiple probes have already been handled)
return self.factor * self.distance_function(model, probe)

# re-define unused functions, just so that they do not get documented
def train_projector(*args,**kwargs): raise NotImplementedError()
def load_projector(*args,**kwargs): pass
def project(*args,**kwargs): raise NotImplementedError()
def write_feature(*args,**kwargs): raise NotImplementedError()
def read_feature(*args,**kwargs): raise NotImplementedError()
def train_enroller(*args,**kwargs): raise NotImplementedError()
def load_enroller(*args,**kwargs): pass
1 change: 1 addition & 0 deletions bob/bio/base/algorithm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .Algorithm import Algorithm
from .Distance import Distance
from .PCA import PCA
from .LDA import LDA
from .PLDA import PLDA
Expand Down
9 changes: 9 additions & 0 deletions bob/bio/base/config/algorithm/distance_cosine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python

import bob.bio.base
import scipy.spatial

algorithm = bob.bio.base.algorithm.Distance(
distance_function = scipy.spatial.distance.cosine,
is_distance_function = True
)
9 changes: 9 additions & 0 deletions bob/bio/base/config/algorithm/distance_euclidean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/usr/bin/env python

import bob.bio.base
import scipy.spatial

algorithm = bob.bio.base.algorithm.Distance(
distance_function = scipy.spatial.distance.euclidean,
is_distance_function = True
)
38 changes: 24 additions & 14 deletions bob/bio/base/test/test_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,30 @@ def _compare(data, reference, write_function = bob.bio.base.save, read_function
assert numpy.allclose(data, read_function(reference), atol=1e-5)


def _gmm_stats(self, feature_file, count = 50, minimum = 0, maximum = 1):
# generate a random sequence of GMM-Stats features
numpy.random.seed(42)
train_set = []
f = bob.io.base.HDF5File(feature_file)
for i in range(count):
per_id = []
for j in range(count):
gmm_stats = bob.learn.em.GMMStats(f)
gmm_stats.sum_px = numpy.random.random(gmm_stats.sum_px.shape) * (maximum - minimum) + minimum
gmm_stats.sum_pxx = numpy.random.random(gmm_stats.sum_pxx.shape) * (maximum - minimum) + minimum
per_id.append(gmm_stats)
train_set.append(per_id)
return train_set
def test_distance():
# test the two registered distance functions

# euclidean distance
euclidean = bob.bio.base.load_resource("distance-euclidean", "algorithm", preferred_package = 'bob.bio.base')
assert isinstance(euclidean, bob.bio.base.algorithm.Distance)
assert isinstance(euclidean, bob.bio.base.algorithm.Algorithm)
assert not euclidean.performs_projection
assert not euclidean.requires_projector_training
assert not euclidean.use_projected_features_for_enrollment
assert not euclidean.split_training_features_by_client
assert not euclidean.requires_enroller_training

# test distance computation
f1 = numpy.ones((20,10), numpy.float64)
f2 = numpy.ones((20,10), numpy.float64) * 2.

model = euclidean.enroll([f1, f1])
assert abs(euclidean.score_for_multiple_probes(model, [f2, f2]) + math.sqrt(200.)) < 1e-6, euclidean.score_for_multiple_probes(model, [f2, f2])

# test cosine distance
cosine = bob.bio.base.load_resource("distance-cosine", "algorithm", preferred_package = 'bob.bio.base')
model = cosine.enroll([f1, f1])
assert abs(cosine.score_for_multiple_probes(model, [f2, f2])) < 1e-8, cosine.score_for_multiple_probes(model, [f2, f2])


def test_pca():
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@

'bob.bio.algorithm': [
'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only
'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm',
'distance-cosine = bob.bio.base.config.algorithm.distance_cosine:algorithm',
'pca = bob.bio.base.config.algorithm.pca:algorithm',
'lda = bob.bio.base.config.algorithm.lda:algorithm',
'pca+lda = bob.bio.base.config.algorithm.pca_lda:algorithm',
Expand Down
2 changes: 1 addition & 1 deletion version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.0.5b0
2.0.5b1

0 comments on commit f41083f

Please sign in to comment.