diff --git a/bob/measure/data/nonsep-epc.hdf5 b/bob/measure/data/nonsep-epc.hdf5 index c94b2c1..68dae5f 100644 Binary files a/bob/measure/data/nonsep-epc.hdf5 and b/bob/measure/data/nonsep-epc.hdf5 differ diff --git a/bob/measure/error.h b/bob/measure/error.h index 727ea41..44ed314 100644 --- a/bob/measure/error.h +++ b/bob/measure/error.h @@ -13,7 +13,6 @@ #include #include #include -#include namespace bob { namespace measure { @@ -63,7 +62,7 @@ namespace bob { namespace measure { * or "client"). 'negatives' holds the score information for samples that are * labeled *not* to belong to the class (a.k.a., "noise" or "impostor"). * - * For more precise details about how the method considers error rates, please refer to the documentation of the method bob.measure.farfrr. + * For more precise details about how the method considers error rates, please refer to the documentation of the method bob.measure.farfrr. * * It is possible that scores are inverted in the negative/positive sense. In * some setups the designer may have setup the system so 'positive' samples @@ -108,6 +107,61 @@ namespace bob { namespace measure { return blitz::Array(negatives < threshold); } + /** + * Recursively minimizes w.r.t. to the given predicate method. Please refer + * to minimizingThreshold() for a full explanation. This method is only + * supposed to be used through that method. + */ + template + static double recursive_minimization(const blitz::Array& negatives, + const blitz::Array& positives, T& predicate, + double min, double max, size_t steps) { + static const double QUIT_THRESHOLD = 1e-10; + const double diff = max - min; + const double too_small = std::abs(diff/max); + + //if the difference between max and min is too small, we quit. + if ( too_small < QUIT_THRESHOLD ) return min; //or max, does not matter... + + double step_size = diff/(double)steps; + double min_value = predicate(1.0, 0.0); ///< to the left of the range + + //the accumulator holds the thresholds that given the minimum value for the + //input predicate. + std::vector accumulator; + accumulator.reserve(steps); + + for (size_t i=0; i ratios = + farfrr(negatives, positives, threshold); + + double current_cost = predicate(ratios.first, ratios.second); + + if (current_cost < min_value) { + min_value = current_cost; + accumulator.clear(); ///< clean-up, we got a better minimum + accumulator.push_back(threshold); ///< remember this threshold + } + else if (std::abs(current_cost - min_value) < 1e-16) { + //accumulate to later decide... + accumulator.push_back(threshold); + } + } + + //we stop when it doesn't matter anymore to threshold. + if (accumulator.size() != steps) { + //still needs some refinement: pick-up the middle of the range and go + return recursive_minimization(negatives, positives, predicate, + accumulator[accumulator.size()/2]-step_size, + accumulator[accumulator.size()/2]+step_size, + steps); + } + + return accumulator[accumulator.size()/2]; + } + /** * This method can calculate a threshold based on a set of scores (positives * and negatives) given a certain minimization criteria, input as a @@ -124,96 +178,30 @@ namespace bob { namespace measure { * Please note that this method will only work with single-minimum smooth * predicates. * - * The minimization is carried out in a data-driven way. - * First, it sorts the positive and negative scores. - * Starting from the lowest score (might be a positive or a negative), it - * increases the threshold based on the distance between the current score - * and the following higher score (also keeping track of duplicate scores) - * and computes the predicate for each possible threshold. + * The minimization is carried out in a recursive manner. First, we identify + * the threshold that minimizes the predicate given a set of N (N=100) + * thresholds between the min(negatives, positives) and the max(negatives, + * positives). If the minimum lies in a range of values, the center value is + * picked up. * - * Finally, that threshold is returned, for which the predicate returned the - * lowest value. + * In a second round of minimization new minimum and maximum bounds are + * defined based on the center value plus/minus the step (max-min/N) and a + * new minimization round is restarted for N samples within the new bounds. + * + * The procedure continues until all calculated predicates in a given round + * give the same minimum. At this point, the center threshold is picked up and + * returned. */ - template - double minimizingThreshold(const blitz::Array& negatives, const blitz::Array& positives, T& predicate){ - // sort negative and positive scores ascendingly - std::vector negatives_(negatives.extent(0)); - std::copy(negatives.begin(), negatives.end(), negatives_.begin()); - std::sort(negatives_.begin(), negatives_.end(), std::less()); - - std::vector positives_(positives.extent(0)); - std::copy(positives.begin(), positives.end(), positives_.begin()); - std::sort(positives_.begin(), positives_.end(), std::less()); - - // iterate over the whole set of points - std::vector::const_iterator pos_it = positives_.begin(), neg_it = negatives_.begin(); - - // iterate over all possible far and frr points and compute the predicate for each possible threshold... - double min_predicate = 1e8; - double min_threshold = 1e8; - double current_predicate = 1e8; - // we start with the extreme values for far and frr - double far = 1., frr = 0.; - // the decrease/increase for far/frr when moving one negative/positive - double far_decrease = 1./negatives_.size(), frr_increase = 1./positives_.size(); - // we start with the threshold based on the minimum score - double current_threshold = std::min(*pos_it, *neg_it); - // now, iterate over both lists, in a sorted order - while (pos_it != positives_.end() && neg_it != negatives_.end()){ - // compute predicate - current_predicate = predicate(far, frr); - if (current_predicate <= min_predicate){ - min_predicate = current_predicate; - min_threshold = current_threshold; - } - if (*pos_it >= *neg_it){ - // compute current threshold - current_threshold = *neg_it; - // go to the next negative value - ++neg_it; - far -= far_decrease; - } else { - // compute current threshold - current_threshold = *pos_it; - // go to the next positive - ++pos_it; - frr += frr_increase; - } - // increase positive and negative as long as they contain the same value - while (neg_it != negatives_.end() && *neg_it == current_threshold) { - // go to next negative - ++neg_it; - far -= far_decrease; - } - while (pos_it != positives_.end() && *pos_it == current_threshold) { - // go to next positive - ++pos_it; - frr += frr_increase; - } - // compute a new threshold based on the center between last and current score, if we are not already at the end of the score lists - if (neg_it != negatives_.end() || pos_it != positives_.end()){ - if (neg_it != negatives_.end() && pos_it != positives_.end()) - current_threshold += std::min(*pos_it, *neg_it); - else if (neg_it != negatives_.end()) - current_threshold += *neg_it; - else - current_threshold += *pos_it; - current_threshold /= 2; - } - } // while - - // now, we have reached the end of one list (usually the negatives) - // so, finally compute predicate for the last time - current_predicate = predicate(far, frr); - if (current_predicate < min_predicate){ - min_predicate = current_predicate; - min_threshold = current_threshold; + template double + minimizingThreshold(const blitz::Array& negatives, + const blitz::Array& positives, T& predicate) { + const size_t N = 100; ///< number of steps in each iteration + double min = std::min(blitz::min(negatives), blitz::min(positives)); + double max = std::max(blitz::max(negatives), blitz::max(positives)); + return recursive_minimization(negatives, positives, predicate, min, + max, N); } - // return the best threshold found - return min_threshold; - } - /** * Calculates the threshold that is, as close as possible, to the * equal-error-rate (EER) given the input data. The EER should be the point @@ -224,7 +212,7 @@ namespace bob { namespace measure { const blitz::Array& positives); /** - * Calculates the equal-error-rate (EER) given the input data, on the ROC + * Calculates the equal-error-rate (EER) given the input data, on the ROC * Convex Hull, as performed in the Bosaris toolkit. * (https://sites.google.com/site/bosaristoolkit/) */ @@ -292,7 +280,7 @@ namespace bob { namespace measure { blitz::Array roc (const blitz::Array& negatives, const blitz::Array& positives, size_t points); - + /** * Calculates the precision-recall curve given a set of positive and negative scores and a * number of desired points. Returns a two-dimensional blitz::Array of @@ -306,8 +294,8 @@ namespace bob { namespace measure { const blitz::Array& positives, size_t points); /** - * Calculates the ROC Convex Hull (ROCCH) given a set of positive and - * negative scores and a number of desired points. Returns a + * Calculates the ROC Convex Hull (ROCCH) given a set of positive and + * negative scores and a number of desired points. Returns a * two-dimensional blitz::Array of doubles that contain the coordinates * of the vertices of the ROC Convex Hull (the first row is for "pmiss" * and the second row is for "pfa"). @@ -320,10 +308,10 @@ namespace bob { namespace measure { /** * Calculates the Equal Error Rate (EER) on the ROC Convex Hull (ROCCH) - * from the 2-row matrices containing the pmiss and pfa vectors + * from the 2-row matrices containing the pmiss and pfa vectors * (which is the output of the bob::measure::rocch()). * Note: pmiss and pfa contain the coordinates of the vertices of the - * ROC Convex Hull. + * ROC Convex Hull. * Reference: Bosaris toolkit * (https://sites.google.com/site/bosaristoolkit/) */ diff --git a/bob/measure/test_error.py b/bob/measure/test_error.py index f45cf2b..4ae8e08 100644 --- a/bob/measure/test_error.py +++ b/bob/measure/test_error.py @@ -211,7 +211,7 @@ def test_plots(): xy = epc(dev_negatives, dev_positives, test_negatives, test_positives, 100) # uncomment the next line to save a reference value - # bob.io.base.save(xy, F('nonsep-epc.hdf5')) + # save('nonsep-epc.hdf5', xy) xyref = bob.io.base.load(F('nonsep-epc.hdf5')) assert numpy.allclose(xy, xyref, atol=1e-15) diff --git a/version.txt b/version.txt index 1a78b34..4b20305 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.1.0b0 +2.0.5b0 \ No newline at end of file