From 87ef0342cee2ca4c2dd4c3d2c74fb112a09b9fbc Mon Sep 17 00:00:00 2001 From: "Vincent A. Cicirello" Date: Tue, 15 Feb 2022 15:12:16 -0500 Subject: [PATCH 1/3] doc edits --- .../permutations/distance/KendallTauDistance.java | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/cicirello/permutations/distance/KendallTauDistance.java b/src/main/java/org/cicirello/permutations/distance/KendallTauDistance.java index 1233801c..7034e020 100644 --- a/src/main/java/org/cicirello/permutations/distance/KendallTauDistance.java +++ b/src/main/java/org/cicirello/permutations/distance/KendallTauDistance.java @@ -1,5 +1,5 @@ /* - * Copyright 2014, 2015, 2017-2021 Vincent A. Cicirello, . + * Copyright 2014, 2015, 2017-2022 Vincent A. Cicirello, . * * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). * @@ -24,8 +24,6 @@ import java.util.Arrays; /** - * Kendall Tau Distance: - * *

Kendall Tau distance is sometimes also known as bubble sort distance, as it is * the number of adjacent swaps necessary to transform one permutation into the other.

* @@ -47,9 +45,8 @@ *

Kendall Tau distance originally described in:
* M. G. Kendall, "A new measure of rank correlation," Biometrika, vol. 30, no. 1/2, pp. 81–93, June 1938.

* - * @author Vincent A. Cicirello, https://www.cicirello.org/ - * @version 5.13.2021 - * + * @author Vincent A. Cicirello, + * https://www.cicirello.org/ */ public final class KendallTauDistance implements NormalizedPermutationDistanceMeasurer { From 813ff9293185b6dc30308b1fa511c75833d8e1c3 Mon Sep 17 00:00:00 2001 From: "Vincent A. Cicirello" Date: Tue, 15 Feb 2022 16:09:09 -0500 Subject: [PATCH 2/3] added weighted Kendall tau distance --- .../distance/WeightedKendallTauDistance.java | 156 ++++++++++++++++++ .../distance/PermutationDistanceMaxTests.java | 24 +++ .../PermutationDistanceNormTests.java | 14 ++ .../distance/PermutationDistanceTests.java | 63 +++++++ 4 files changed, 257 insertions(+) create mode 100644 src/main/java/org/cicirello/permutations/distance/WeightedKendallTauDistance.java diff --git a/src/main/java/org/cicirello/permutations/distance/WeightedKendallTauDistance.java b/src/main/java/org/cicirello/permutations/distance/WeightedKendallTauDistance.java new file mode 100644 index 00000000..b4cb62be --- /dev/null +++ b/src/main/java/org/cicirello/permutations/distance/WeightedKendallTauDistance.java @@ -0,0 +1,156 @@ +/* + * JavaPermutationTools: A Java library for computation on permutations and sequences. + * Copyright (C) 2018-2022 Vincent A. Cicirello, . + * + * This file is part of JavaPermutationTools (https://jpt.cicirello.org/). + * + * JavaPermutationTools is free software: you can + * redistribute it and/or modify it under the terms of the GNU + * General Public License as published by the Free Software + * Foundation, either version 3 of the License, or (at your + * option) any later version. + * + * JavaPermutationTools is distributed in the hope + * that it will be useful, but WITHOUT ANY WARRANTY; without even + * the implied warranty of MERCHANTABILITY or FITNESS FOR A + * PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with JavaPermutationTools. If not, see . + */ +package org.cicirello.permutations.distance; + +import org.cicirello.permutations.Permutation; +import java.util.Arrays; + +/** + *

This class implements the weighted Kendall tau distance. In the original + * Kendall tau distance, each inverted pair of elements (i.e., such that element + * x appears someplace before y in Permutation p1, but someplace after y in Permutation p2) + * contributes 1 to the distance. Thus, since there are n(n-1)/2 pairs of elements, + * the maximum of Kendall tau distance is n(n-1)/2 where n is the permutation length. + * In this weighted Kendall tau distance, each element x of the permutation has an + * associated weight w(x), and each inverted pair x, y (where x appears before sometime + * prior to y in p1, but sometime after y in p2) contributes w(x) * w(y) to the weighted + * Kendall tau distance.

+ * + *

The weighted Kendall tau distance was first described in:
+ * "Failure proximity: a fault localization-based approach" (Liu and Han, SIGSOFT 2006, pages 46-56).

+ * + *

The runtime of JPT's implementation is O(n lg n), where n is the permutation length. + * This runtime is achieved using a modified version of mergesort to sum the weighted inversions.

+ * + * @author Vincent A. Cicirello, + * https://www.cicirello.org/ + */ +public final class WeightedKendallTauDistance implements NormalizedPermutationDistanceMeasurerDouble { + + private final double[] weights; + private final double maxDistance; + + /** + * Constructs an instance of the WeightedKendallTauDistance. + * @param weights An array of weights, such that weights[e] is the weight of + * element e. + */ + public WeightedKendallTauDistance(double[] weights) { + this.weights = weights.clone(); + double max = 0; + for (int i = 0; i < weights.length - 1; i++) { + double runningSum = 0; + for (int j = i+1; j < weights.length; j++) { + runningSum += weights[j]; + } + max += weights[i] * runningSum; + } + maxDistance = max; + } + + /** + * Gets the length of permutations supported by this instance of + * WeightedKendallTauDistance, which is equal to the length of the + * array of weights passed to the constructor. + * + * @return The length of supported Permutations. + */ + public int supportedLength() { + return weights.length; + } + + /** + * {@inheritDoc} + * + * @throws IllegalArgumentException if p1.length() is not equal to supportedLength(), + * or if p2.length() is not equal to supportedLength(). + */ + @Override + public double distancef(Permutation p1, Permutation p2) { + if (p1.length() != weights.length || p2.length() != weights.length) { + throw new IllegalArgumentException("p1 and/or p2 not of supported length of this instance"); + } + // use inverse of p1 as a relabeling + int[] invP1 = p1.getInverse(); + + // relabel array copy of p2 and likewise map weights to weights of relabeled copy + int[] arrayP2 = new int[invP1.length]; + double[] w = new double[weights.length]; + for (int i = 0; i < arrayP2.length; i++) { + arrayP2[i] = invP1[p2.get(i)]; + w[arrayP2[i]] = weights[p2.get(i)]; + } + + return countWeightedInversions(arrayP2, w); + } + + /** + * {@inheritDoc} + * + *

This implementation ignores the length parameter since this + * distance is configured for one specific length based upon the weights + * passed during construction.

+ */ + @Override + public double maxf(int length) { + return maxDistance; + } + + private double countWeightedInversions(int[] array, double[] w) { + if (array.length <= 1) return 0; + int m = array.length >> 1; + int[] left = Arrays.copyOfRange(array, 0, m); + int[] right = Arrays.copyOfRange(array, m, array.length); + double weightedCount = countWeightedInversions(left, w) + countWeightedInversions(right, w); + int i = 0; + int j = 0; + int k = 0; + while (i < left.length && j < right.length) { + if (left[i] < right[j]) { + array[k] = left[i]; + i++; + k++; + } else { + // inversions + double leftWeights = 0; + for (int x = i; x < left.length; x++) { + leftWeights += w[left[x]]; + } + weightedCount += w[right[j]] * leftWeights; + array[k] = right[j]; + j++; + k++; + } + } + while (i < left.length) { + array[k] = left[i]; + i++; + k++; + } + while (j < right.length) { + array[k] = right[j]; + j++; + k++; + } + return weightedCount; + } +} diff --git a/src/test/java/org/cicirello/permutations/distance/PermutationDistanceMaxTests.java b/src/test/java/org/cicirello/permutations/distance/PermutationDistanceMaxTests.java index 2a2f58fe..7cf829e1 100644 --- a/src/test/java/org/cicirello/permutations/distance/PermutationDistanceMaxTests.java +++ b/src/test/java/org/cicirello/permutations/distance/PermutationDistanceMaxTests.java @@ -187,6 +187,30 @@ public void testKendallTauDistance() { } } + @Test + public void testWeightedKendallTauDistance() { + for (int n = 0; n <= 7; n++) { + double[] weights = new double[n]; + for (int i = 0; i < n; i++) { + weights[i] = 1; + } + WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights); + double expected = n*(n-1)/2; + assertEquals(expected, d.maxf(n), EPSILON, "Failed on length: " + n); + + for (int i = 0; i < n; i++) { + weights[i] = 2; + } + d = new WeightedKendallTauDistance(weights); + expected *= 4; + assertEquals(expected, d.maxf(n), EPSILON, "Failed on length: " + n); + } + double[] weights = { 5, 10, 2, 0, 8, 3 }; + WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights); + double expected = 25*3 + 17*8 + 15*2 + 50; + assertEquals(expected, d.maxf(weights.length), EPSILON); + } + @Test public void testReinsertionDistance() { diff --git a/src/test/java/org/cicirello/permutations/distance/PermutationDistanceNormTests.java b/src/test/java/org/cicirello/permutations/distance/PermutationDistanceNormTests.java index 3e3a6743..969ef715 100644 --- a/src/test/java/org/cicirello/permutations/distance/PermutationDistanceNormTests.java +++ b/src/test/java/org/cicirello/permutations/distance/PermutationDistanceNormTests.java @@ -24,6 +24,7 @@ import org.junit.jupiter.api.*; import static org.junit.jupiter.api.Assertions.*; import org.cicirello.permutations.*; +import java.util.SplittableRandom; /** * JUnit tests for the normalizedDistance method of various classes that implement permutation distance metrics. @@ -136,6 +137,19 @@ public void testKendallTauDistance() { } } + @Test + public void testWeightedKendallTauDistance() { + SplittableRandom gen = new SplittableRandom(42); + for (int n = 0; n <= 6; n++) { + double[] weights = new double[n]; + for (int i = 0; i < n; i++) { + weights[i] = 5 + 15*gen.nextDouble(); + } + WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights); + assertEquals(n<=1 ? 0.0 : 1.0, bruteForceComputeMaxD(d,n), EPSILON, "Failed on length: " + n); + } + } + @Test public void testReinsertionDistance() { diff --git a/src/test/java/org/cicirello/permutations/distance/PermutationDistanceTests.java b/src/test/java/org/cicirello/permutations/distance/PermutationDistanceTests.java index 0f98cec7..d96087c2 100644 --- a/src/test/java/org/cicirello/permutations/distance/PermutationDistanceTests.java +++ b/src/test/java/org/cicirello/permutations/distance/PermutationDistanceTests.java @@ -603,6 +603,69 @@ public void testInterchangeDistance() { ); } + @Test + public void testWeightedKendallTauDistance_WeightsAllOneCase() { + for (int n = 2; n <= 10; n++) { + double[] weights = new double[n]; + for (int i = 0; i < n; i++) { + weights[i] = 1; + } + WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights); + assertEquals(n, d.supportedLength()); + Permutation p = new Permutation(n); + Permutation copy = new Permutation(p); + assertEquals(0.0, d.distancef(p, copy), 1E-10); + //maximal distance is permutation reversed + copy.reverse(); + double expected = n*(n-1)/2; + assertEquals(expected, d.distancef(p,copy)); + copy.reverse(); + copy.swap(0,n-1); + expected = 2*n-3; + assertEquals(expected, d.distancef(p,copy), 1E-10); + } + final WeightedKendallTauDistance d = new WeightedKendallTauDistance(new double[] {1, 1, 1, 1, 1, 1}); + Permutation p = new Permutation(6); + for (Permutation q : p) { + assertEquals(naiveKendalTau(p,q), d.distancef(p,q), 1E-10); + } + + IllegalArgumentException thrown = assertThrows( + IllegalArgumentException.class, + () -> d.distancef(new Permutation(5), new Permutation(6)) + ); + assertThrows( + IllegalArgumentException.class, + () -> d.distancef(new Permutation(6), new Permutation(5)) + ); + } + + @Test + public void testWeightedKendallTauDistance() { + double[] weights = {8, 2, 10, 20, 5, 1}; + int[] p1 = { 5, 2, 0, 3, 1, 4}; + WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights); + assertEquals(0.0, d.distancef(new Permutation(p1), new Permutation(p1)), 1E-10); + int[] p2 = { 4, 2, 0, 3, 1, 5 }; + double expected = 41*5 + 40; + assertEquals(expected, d.distancef(new Permutation(p1), new Permutation(p2)), 1E-10); + int[] p3 = { 5, 2, 0, 1, 3, 4}; + expected = 40; + assertEquals(expected, d.distancef(new Permutation(p1), new Permutation(p3)), 1E-10); + } + + @Test + public void testWeightedKendallTauDistanceReversed() { + double[] weights = {8, 2, 10, 20, 5, 1}; + WeightedKendallTauDistance d = new WeightedKendallTauDistance(weights); + int[] perm = { 5, 2, 0, 3, 1, 4}; + Permutation p1 = new Permutation(perm); + Permutation p2 = new Permutation(p1); + p2.reverse(); + double expected = 45.0 + 40.0*5 + 20*20 + 10*10 + 8*2; + assertEquals(expected, d.distancef(new Permutation(p1), new Permutation(p2)), 1E-10); + } + @Test public void testKendallTauDistance() { KendallTauDistance d = new KendallTauDistance(); From a1f6cb86bf8090374ea1253ff8abe5a85e0cf5d5 Mon Sep 17 00:00:00 2001 From: "Vincent A. Cicirello" Date: Tue, 15 Feb 2022 16:11:37 -0500 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf41e3fd..8d7428dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,9 +9,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added ### Changed -* Bumped dependency rho-mu to 1.2.0 -* Bumped dependency org.cicirello.core to 1.1.0 -* Migrated test cases to JUnit 5 (specifically JUnit Jupiter 5.8.2). ### Deprecated @@ -19,6 +16,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +### CI/CD + +### Other + + +## [3.1.0] - 2022-02-15 + +### Added +* WeightedKendallTauDistance: an implementation of a weighted version of Kendall tau distance + +### Changed +* Bumped dependency rho-mu to 1.2.0 +* Bumped dependency org.cicirello.core to 1.1.0 +* Migrated test cases to JUnit 5 (specifically JUnit Jupiter 5.8.2). + ### CI/CD * Automated commenting of test coverage percentages on pull requests. * Revised documentation workflow to deploy to API doc updates to website only