Skip to content

Commit

Permalink
gs-cv: added a comparison based on Levenshtein distance in OCRPlasty
Browse files Browse the repository at this point in the history
  • Loading branch information
plassalas committed Oct 12, 2017
1 parent 916e0ab commit 7b3aec6
Showing 1 changed file with 39 additions and 3 deletions.
42 changes: 39 additions & 3 deletions gs-cv/src/main/java/org/genericsystem/cv/utils/OCRPlasty.java
Expand Up @@ -38,17 +38,19 @@ public static List<String> getRansacInliers(List<String> labels) {
return Collections.emptyList();

// int maxLength = getMaxLcsLength(trimmed);
double maxSimilarity = getMaxSimilarity(trimmed);
// double maxSimilarity = getMaxSimilarity(trimmed);
double maxLevenshtein = getMaxLevenshtein(trimmed);

Map<Integer, String> bestFit = new HashMap<>();
for (int i = 1, maxAttempts = 10; bestFit.size() <= 3 && i <= maxAttempts; ++i) {
int t = 1;
// Ransac<String> ransac = new Ransac<>(trimmed, getModelProviderMaxLcs(maxLength), 3, 50 * i, t, trimmed.size() / 2);
Ransac<String> ransac = new Ransac<>(trimmed, getModelProviderSimilarity(maxSimilarity), 3, 50 * i, t, trimmed.size() / 2);
// Ransac<String> ransac = new Ransac<>(trimmed, getModelProviderSimilarity(maxSimilarity), 3, 50 * i, t, trimmed.size() / 2);
Ransac<String> ransac = new Ransac<>(trimmed, getModelProviderLevenshtein(maxLevenshtein), 3, 50 * i, t, trimmed.size() / 2);
try {
ransac.compute();
bestFit = ransac.getBestDataSet();
// bestFit.entrySet().forEach(entry -> System.out.println("key: " + entry.getKey() + " | value: " + entry.getValue()));
bestFit.entrySet().forEach(entry -> System.out.println("key: " + entry.getKey() + " | value: " + entry.getValue()));
} catch (Exception e) {
// Can't get a good model. Increase the error margin
t += 1;
Expand All @@ -72,6 +74,17 @@ private static double getMaxSimilarity(List<String> labels) {
return max;
}

private static double getMaxLevenshtein(List<String> labels) {
double max = 0d;
for (int i = 0; i < labels.size(); ++i) {
String base = labels.get(i);
for (int j = 0; j < labels.size(); ++j) { // could use int j = i, but not possible with an iterator in the modelprovider
max += Levenshtein.distance(base, labels.get(j)) / ((double) base.length() + labels.get(j).length());
}
}
return max;
}

private static Function<Collection<String>, Model<String>> getModelProviderMaxLcs(int maxLength) {
return datas -> {
Iterator<String> it = datas.iterator();
Expand Down Expand Up @@ -123,6 +136,29 @@ public Object[] getParams() {
};
}

private static Function<Collection<String>, Model<String>> getModelProviderLevenshtein(double maxLevenshtein) { // TODO
return datas -> {
return new Model<String>() {
private double max = 0d;

@Override
public double computeError(String data) {
for (String s : datas) {
if (s != data) {
max += Levenshtein.distance(data, s);
}
}
return Math.abs(max - maxLevenshtein);
}

@Override
public Object[] getParams() {
return new Object[] { max };
}
};
};
}

public static String ocrPlasty(List<String> labels) {
if (labels == null || labels.isEmpty())
throw new IllegalStateException("Attempt to compute the longestCommonSubsequence on an empty list");
Expand Down

0 comments on commit 7b3aec6

Please sign in to comment.