Skip to content

Commit

Permalink
Added function to convert Bob's 4col and 5col score files to OpenBR m…
Browse files Browse the repository at this point in the history
…atrices
  • Loading branch information
siebenkopf committed Sep 17, 2015
1 parent 58802bc commit fb45722
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 4 deletions.
1 change: 1 addition & 0 deletions bob/measure/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from . import plot
from . import load
from . import calibration
from . import openbr
import numpy

def mse (estimation, target):
Expand Down
5 changes: 5 additions & 0 deletions bob/measure/data/scores.mask
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
S2
unknown-gallery.lst
unknown-probe.lst
MB 100 20 xV4
˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙˙
Binary file added bob/measure/data/scores.mtx
Binary file not shown.
119 changes: 119 additions & 0 deletions bob/measure/openbr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
"""This file includes functionality to convert between Bob's four column or five column score files and the Matrix files used in OpenBR."""

import numpy
import logging
logger = logging.getLogger("bob.measure")

from .load import open_file, four_column, five_column

def write_matrix(
score_file,
matrix_file,
mask_file,
model_names = None,
probe_names = None,
score_file_format = '4column',
gallery_file_name = 'unknown-gallery.lst',
probe_file_name = 'unknown-probe.lst'
):
"""Writes the OpenBR matrix and mask files (version 2), given the score file.
If gallery and probe names are provided, the matrices in both files will be sorted by gallery and probe names.
.. warning::
When provided with a 4-column score file, this function will work only, if there is only a single model id for each client.
Keyword parameters:
score_file : str
The 4 or 5 column style score file written by bob.
matrix_file : str
The OpenBR matrix file that should be written.
Usually, the file name extension is .mtx
mask_file : str
The OpenBR mask file that should be written.
The mask file defines, which values are positives, negatives or to be ignored.
gallery_file_name : str
The name of the gallery file that will be written in the header of the OpenBR files.
probe_file_name : str
The name of the probe file that will be written in the header of the OpenBR files.
model_names : [str] or ``None``
If given, the matrix will be written in the same order as the given model names.
The model names must be identical with the second column in the 5-column ``score_file``.
.. note::
If the score file is in four column format, the model_names must be the client ids stored in the first row.
In this case, there might be only a single model per client
Only the scores of the given models will be considered.
probe_names : [str] or ``None``
If given, the matrix will be written in the same order as the given probe names (the path of the probe).
The probe names are identical to the third line of the ``score_file``.
Only the scores of the given probe names will be considered in this case.
"""

def _write_matrix(filename, matrix):
## Helper function to write a matrix file as required by OpenBR
with open(filename, 'wb') as f:
# write the first four lines
f.write("S2\n%s\n%s\nM%s %d %d " % (gallery_file_name, probe_file_name, 'B' if matrix.dtype == numpy.uint8 else 'F', matrix.shape[0], matrix.shape[1]))
# write magic number
numpy.array(0x12345678, numpy.int32).tofile(f)
f.write("\n")
# write the matrix
matrix.tofile(f)


# define read functions, and which information should be read
read_function = {'4column' : four_column, '5column' : five_column}[score_file_format]
offset = {'4column' : 0, '5column' : 1}[score_file_format]

# first, read the score file and estimate model ids and probe names, if not given
if model_names is None or probe_names is None:
model_names, probe_names = [], []
model_set, probe_set = set(), set()

# read the score file
for line in read_function(score_file):
model, probe = line[offset], line[2+offset]
if model not in model_set:
model_names.append(model)
model_set.add(model)
if probe not in probe_set:
probe_names.append(probe)
probe_set.add(probe)

# create a shortcut to get indices for client and probe subset (to increase speed)
model_dict = {m:i for i,m in enumerate(model_names)}
probe_dict = {p:i for i,p in enumerate(probe_names)}

# now, create the matrices in the desired size
matrix = numpy.ndarray((len(probe_names), len(model_names)), numpy.float32)
matrix[:] = numpy.nan
mask = numpy.zeros(matrix.shape, numpy.uint8)

# now, iterate through the score file and fill in the matrix
for line in read_function(score_file):
client, model, id, probe, score = line[0], line[offset], line[1+offset], line[2+offset], line[3+offset]

assert model in model_dict
assert probe in probe_dict

model_index = model_dict[model]
probe_index = probe_dict[probe]

# check, if we have already written something into that matrix element
if mask[probe_index, model_index]:
logger.warn("Overwriting existing matrix '%f' element of client '%s' and probe '%s' with '%f'", matrix[probe_index, model_index], client, probe, score)

matrix[probe_index, model_index] = score
mask[probe_index, model_index] = 0xff if client == id else 0x7f

# OK, now finally write the file in the desired format
_write_matrix(mask_file, mask)
_write_matrix(matrix_file, matrix)
54 changes: 50 additions & 4 deletions bob/measure/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@
"""Tests the IO functionality of bob.measure."""

import bob.measure
import pkg_resources
import tempfile, os, shutil

import bob.io.base.test_utils

def test_load_scores():
# This function tests the IO functionality of loading score files in different ways
Expand All @@ -18,18 +20,62 @@ def test_load_scores():
cols = {'4col' : 4, '5col' : 5}

for variant in ('4col', '5col'):

# read score file in normal way
normal_score_file = pkg_resources.resource_filename('bob.measure', 'data/dev-%s.txt' % variant)
normal_score_file = bob.io.base.test_utils.datafile('dev-%s.txt' % variant, 'bob.measure')
normal_scores = list(load_functions[variant](normal_score_file))

assert len(normal_scores) == 910
assert all(len(s) == cols[variant] for s in normal_scores)

# read the compressed score file
compressed_score_file = pkg_resources.resource_filename('bob.measure', 'data/dev-%s.tar.gz' % variant)
compressed_score_file = bob.io.base.test_utils.datafile('dev-%s.tar.gz' % variant, 'bob.measure')
compressed_scores = list(load_functions[variant](compressed_score_file))

assert len(compressed_scores) == len(normal_scores)
assert all(len(c) == cols[variant] for c in compressed_scores)
assert all(c[i] == s[i] for c,s in zip(compressed_scores, normal_scores) for i in range(cols[variant]))


def _check_binary_identical(name1, name2):
# see: http://www.peterbe.com/plog/using-md5-to-check-equality-between-files
import md5
# tests if two files are binary identical
with open(name1) as f1, open(name2) as f2:
assert md5.new(f1.read()).digest() == md5.new(f2.read()).digest()


def test_convert_openbr():
# This function tests that the conversion to the OpenBR file works as expected
temp_dir = tempfile.mkdtemp(prefix='bob_test')

# define output files
openbr_extensions = ('.mtx', '.mask')
matrix_file, mask_file = [os.path.join(temp_dir, "scores%s") % ext for ext in openbr_extensions]

try:
for variant in ('4col', '5col'):
# get score file
score_file = bob.io.base.test_utils.datafile('scores-cmc-%s.txt' % variant, 'bob.measure')

# first round, do not define keyword arguments -- let the file get the gallery and probe ids automatically
kwargs = {}
for i in range(2):
# get the files by automatically obtaining the identities
bob.measure.openbr.write_matrix(score_file, matrix_file, mask_file, score_file_format = "%sumn" % variant, **kwargs)

assert os.path.isfile(matrix_file) and os.path.isfile(mask_file)

# check that they are binary identical to the reference files (which are tested to work and give the same results with OpenBR)
matrix_ref, mask_ref = [bob.io.base.test_utils.datafile('scores%s' % ext, 'bob.measure') for ext in openbr_extensions]
_check_binary_identical(matrix_file, matrix_ref)
_check_binary_identical(mask_file, mask_ref)

# define new kwargs for second round, i.e., define model and probe names
# these names are identical to what is found in the score file, which in turn comes from the AT&T database
model_type = {"4col" : "%d", "5col" : "s%d"}[variant]
dev_ids = (3,4,7,8,9,13,15,18,19,22,23,25,28,30,31,32,35,37,38,40)
kwargs['model_names'] = [model_type % c for c in dev_ids]
kwargs['probe_names'] = ["s%d/%d" %(c,i) for c in dev_ids for i in (1,3,6,8,10)]

finally:
shutil.rmtree(temp_dir)

0 comments on commit fb45722

Please sign in to comment.