-
Notifications
You must be signed in to change notification settings - Fork 5
/
ComputeBestTextPerZone.java
127 lines (106 loc) · 4.9 KB
/
ComputeBestTextPerZone.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package org.genericsystem.cv.comparator;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.genericsystem.common.Generic;
import org.genericsystem.common.Root;
import org.genericsystem.cv.model.Doc;
import org.genericsystem.cv.model.Doc.DocInstance;
import org.genericsystem.cv.model.DocClass;
import org.genericsystem.cv.model.ImgFilter;
import org.genericsystem.cv.model.ImgFilter.ImgFilterInstance;
import org.genericsystem.cv.model.MeanLevenshtein;
import org.genericsystem.cv.model.Score;
import org.genericsystem.cv.model.Score.ScoreInstance;
import org.genericsystem.cv.model.ZoneGeneric;
import org.genericsystem.cv.model.ZoneGeneric.ZoneInstance;
import org.genericsystem.cv.model.ZoneText;
import org.genericsystem.cv.model.ZoneText.ZoneTextInstance;
import org.genericsystem.kernel.Engine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ComputeBestTextPerZone {
private final static String gsPath = System.getenv("HOME") + "/genericsystem/gs-cv_model3/";
private static Logger log = LoggerFactory.getLogger(ComputeTrainedScores.class);
public static void main(String[] mainArgs) {
final Engine engine = new Engine(gsPath, Doc.class, ImgFilter.class, ZoneGeneric.class, ZoneText.class,
Score.class, MeanLevenshtein.class);
engine.newCache().start();
compute(engine);
engine.close();
}
public static void compute(Root engine) {
final String docType = "id-fr-front";
compute(engine, docType);
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public static void compute(Root engine, String docType) {
Generic currentDocClass = engine.find(DocClass.class).getInstance(docType);
ImgFilter imgFilter = engine.find(ImgFilter.class);
ZoneText zoneText = engine.find(ZoneText.class);
Score score = engine.find(Score.class);
List<DocInstance> docInstances = (List) currentDocClass.getHolders(engine.find(Doc.class)).toList();
List<ZoneInstance> zoneInstances = (List) currentDocClass.getHolders(engine.find(ZoneGeneric.class)).toList();
List<ImgFilterInstance> imgFilterInstances = (List) imgFilter.getInstances()
.filter(f -> !"reality".equals(f.getValue())).toList();
ImgFilterInstance realityInstance = imgFilter.getImgFilter("reality");
ImgFilterInstance bestInstance = imgFilter.setImgFilter("best");
for (DocInstance docInstance : docInstances) {
log.debug("Processing doc: {}", docInstance.getValue());
for (ZoneInstance zoneInstance : zoneInstances) {
log.debug("Zone n°{}", zoneInstance.getValue());
ZoneTextInstance realTextInstance = zoneText.getZoneText(docInstance, zoneInstance, realityInstance);
// If not supervised, compute the best text
if (realTextInstance == null) {
// Map containing the distinct OCR texts as a key, and the
// names of the imgFilters that gave this OCR
Map<String, List<String>> ocrResults = new ConcurrentHashMap<>();
for (ImgFilterInstance imgFilterInstance : imgFilterInstances) {
ZoneTextInstance zti = zoneText.getZoneText(docInstance, zoneInstance, imgFilterInstance);
if (zti == null) {
// TODO case where zti doesn't exist == filter has
// not been applied
log.error("No text found for {} => zone n°{}, {}", docInstance.getValue(),
zoneInstance.getValue(), imgFilterInstance.getValue());
} else {
String text = zti.getValue().toString();
List<String> filters = ocrResults.get(text);
if (filters == null) {
filters = new ArrayList<>();
}
filters.add(imgFilterInstance.getValue().toString());
ocrResults.put(text, filters);
}
}
// Map containing each distinct OCR text as key, and the
// corresponding ponderation as a value (i.e., the sum of
// the individual scores of each filter that gave this
// string)
Map<String, Float> ocrElection = new ConcurrentHashMap<>();
ocrResults.entrySet().forEach(entry -> {
Float ocrWeight = 0f;
for (String filter : entry.getValue()) {
ScoreInstance scoreInstance = score.getScore(zoneInstance, imgFilter.getImgFilter(filter));
if (scoreInstance == null) {
log.error("No score found for zone n°{} and filter {}", zoneInstance.getValue(),
filter);
} else {
ocrWeight += (Float) scoreInstance.getValue();
}
}
ocrElection.put(entry.getKey(), ocrWeight);
});
String bestText = ocrElection.entrySet().stream().max(Map.Entry.comparingByValue()).get().getKey();
zoneText.setZoneText(bestText, docInstance, zoneInstance, bestInstance);
} else {
// If supervised, set the supervised text to best
zoneText.setZoneText(realTextInstance.getValue().toString(), docInstance, zoneInstance,
bestInstance);
}
engine.getCurrentCache().flush();
// System.out.println("Best text : " + zoneText.getZoneText(docInstance, zoneInstance, bestInstance));
}
}
}
}