-
Notifications
You must be signed in to change notification settings - Fork 5
/
LetterPairSimilarity.java
71 lines (63 loc) · 2.06 KB
/
LetterPairSimilarity.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package org.genericsystem.cv.utils;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Pattern;
public class LetterPairSimilarity {
private static final String SPACE_PATTERN = "\\s+";
public static void main(String[] args) {
String string = "healed";
String[] strings = new String[] { "healed", "sealed", "healthy", "heard", "herded", "help", "sold" };
for (String s : strings) {
double compare = LetterPairSimilarity.compareStrings(string, s);
System.out.println(String.format("Similarity = %.3f for %s and %s", compare, string, s));
}
}
public static double compareStrings(String string1, String string2) {
if (null == string1 || null == string2)
throw new IllegalArgumentException("LetterPairSimilarity requires two not null strings");
if (string1.equals(string2))
return 1d;
if (string1.isEmpty())
return 0;
if (string2.isEmpty())
return 0;
List<String> pairs1 = wordLetterPairs(string1.toLowerCase());
List<String> pairs2 = wordLetterPairs(string2.toLowerCase());
int intersection = 0;
int union = pairs1.size() + pairs2.size();
for (int i = 0; i < pairs1.size(); ++i) {
String pair1 = pairs1.get(i);
for (int j = 0; j < pairs2.size(); ++j) {
String pair2 = pairs2.get(j);
if (pair1.equals(pair2)) {
intersection++;
pairs2.remove(j);
break;
}
}
}
return (2d * intersection) / union;
}
private static List<String> wordLetterPairs(String string) {
List<String> allPairs = new ArrayList<>();
Pattern pattern = Pattern.compile(SPACE_PATTERN);
String[] words = pattern.split(string);
for (int i = 0; i < words.length; ++i) {
String[] pairsInWord = letterPairs(words[i]);
for (int j = 0; j < pairsInWord.length; ++j) {
allPairs.add(pairsInWord[j]);
}
}
return allPairs;
}
private static String[] letterPairs(String string) {
if (string.length() < 1)
return new String[] {};
int numPairs = string.length() - 1;
String[] pairs = new String[numPairs];
for (int i = 0; i < numPairs; ++i) {
pairs[i] = string.substring(i, i + 2);
}
return pairs;
}
}