Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/jmkeil/commons-text
Browse files Browse the repository at this point in the history
  • Loading branch information
chtompki committed Aug 8, 2018
2 parents 97d6064 + 5d14854 commit 6a79e11
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,17 +85,17 @@ public Double apply(final CharSequence left, final CharSequence right) {
if (m == 0) {
return 0D;
}
final double j = ((m / left.length() + m / right.length() + (m - mtp[1]) / m)) / 3;
final double jw = j < 0.7D ? j : j + Math.min(defaultScalingFactor, 1D / mtp[3]) * mtp[2] * (1D - j);
final double j = ((m / left.length() + m / right.length() + (m - (double) mtp[1] / 2) / m)) / 3;
final double jw = j < 0.7D ? j : j + defaultScalingFactor * mtp[2] * (1D - j);
return jw;
}

/**
* This method returns the Jaro-Winkler string matches, transpositions, prefix, max array.
* This method returns the Jaro-Winkler string matches, half transpositions, prefix array.
*
* @param first the first string to be matched
* @param second the second string to be matched
* @return mtp array containing: matches, transpositions, prefix, and max length
* @return mtp array containing: matches, half transpositions, and prefix
*/
protected static int[] matches(final CharSequence first, final CharSequence second) {
CharSequence max, min;
Expand Down Expand Up @@ -136,21 +136,21 @@ protected static int[] matches(final CharSequence first, final CharSequence seco
si++;
}
}
int transpositions = 0;
int halfTranspositions = 0;
for (int mi = 0; mi < ms1.length; mi++) {
if (ms1[mi] != ms2[mi]) {
transpositions++;
halfTranspositions++;
}
}
int prefix = 0;
for (int mi = 0; mi < min.length(); mi++) {
for (int mi = 0; mi < Math.min(4, min.length()); mi++) {
if (first.charAt(mi) == second.charAt(mi)) {
prefix++;
} else {
break;
}
}
return new int[] {matches, transpositions / 2, prefix, max.length()};
return new int[] {matches, halfTranspositions, prefix};
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,14 @@ public void testGetJaroWinklerDistance_StringString() {
assertEquals(0.92499d, distance.apply("frog", "fog"), 0.00001d);
assertEquals(0.0d, distance.apply("fly", "ant"), 0.00000000000000000001d);
assertEquals(0.44166d, distance.apply("elephant", "hippo"), 0.00001d);
assertEquals(0.92740d, distance.apply("ABC Corporation", "ABC Corp"), 0.00001d);
assertEquals(0.94580d, distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."), 0.00001d);
assertEquals(0.921458d,
assertEquals(0.90666d, distance.apply("ABC Corporation", "ABC Corp"), 0.00001d);
assertEquals(0.95251d, distance.apply("D N H Enterprises Inc", "D & H Enterprises, Inc."), 0.00001d);
assertEquals(0.942d,
distance.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness"), 0.00001d);
assertEquals(0.882329d, distance.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.00001d);
assertEquals(0.996598d, distance.apply("/opt/software1", "/opt/software2"), 0.00001d);
assertEquals(0.898018d, distance.apply("PENNSYLVANIA", "PENNCISYLVNIA"), 0.00001d);
assertEquals(0.971428d, distance.apply("/opt/software1", "/opt/software2"), 0.00001d);
assertEquals(0.941666d, distance.apply("aaabcd", "aaacdb"), 0.00001d);
assertEquals(0.911111d, distance.apply("John Horn", "John Hopkins"), 0.00001d);
}

@Test
Expand Down

0 comments on commit 6a79e11

Please sign in to comment.