-
Notifications
You must be signed in to change notification settings - Fork 5
/
FillNewModelWithData.java
237 lines (208 loc) · 9.79 KB
/
FillNewModelWithData.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
package org.genericsystem.cv.classifier;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.genericsystem.api.core.exceptions.RollbackException;
import org.genericsystem.common.Root;
import org.genericsystem.cv.Img;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ConsolidatedType;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ConsolidatedType.ConsolidatedInstance;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.DocClassType;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.DocClassType.DocClassInstance;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.DocType;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.DocType.DocInstance;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ImgDocRel;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ImgDocRel.ImgDocLink;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ImgPathType;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ImgPathType.ImgPathInstance;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ImgTimestampType;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ImgTimestampType.ImgTimestampInstance;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ImgType;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ImgType.ImgInstance;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.LayoutType;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.LayoutType.LayoutInstance;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ZoneNumType;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ZoneNumType.ZoneNumInstance;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ZoneType;
import org.genericsystem.cv.classifier.newmodel.SimpleModel.ZoneType.ZoneInstance;
import org.genericsystem.cv.comparator.FillModelWithData;
import org.genericsystem.cv.comparator.ImgFilterFunction;
import org.genericsystem.cv.comparator.ImgFunction;
import org.genericsystem.cv.utils.ClassifierUsingFields;
import org.genericsystem.cv.utils.Deskewer;
import org.genericsystem.cv.utils.Deskewer.METHOD;
import org.genericsystem.cv.utils.ModelTools;
import org.genericsystem.cv.utils.NativeLibraryLoader;
import org.genericsystem.kernel.Engine;
import org.opencv.core.Rect;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import io.vertx.core.json.JsonObject;
public class FillNewModelWithData {
static {
NativeLibraryLoader.load();
}
public static final String DOC_PATH = "docPath";
public static final String FILENAME = "filename";
public static final String ENCODED_FILENAME = "encodedFilename";
public static final String DOC_TIMESTAMP = "docTimestamp";
public static final String ZONES = "zones";
public static final String FIELD_NUM = "fieldNum";
public static final String RECT = "rectangle";
public static final String CONSOLIDATED = "consolidated";
private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private static final String gsPath = System.getenv("HOME") + "/genericsystem/gs-cv-newmodel";
private static final String BASE_PATH = System.getenv("HOME") + "/genericsystem/gs-ir-files/";
public static void main(String[] args) {
Path filePath = Paths.get(BASE_PATH, "converted-png", "image-1.png");
Path deskewedPath = Deskewer.deskewAndSave(filePath);
JsonObject jsonFields = detectFields(deskewedPath);
System.out.println(jsonFields.encodePrettily());
JsonObject data = processFile(deskewedPath, jsonFields);
System.out.println(data.encodePrettily());
Root engine = getEngine(gsPath);
saveOcrDataInModel(engine, data);
engine.close();
}
public static Root getEngine(String gsPath) {
return new Engine(gsPath, DocClassType.class, DocClassInstance.class, LayoutType.class, LayoutInstance.class, ImgDocRel.class, ImgDocLink.class, DocType.class, DocInstance.class, ImgType.class, ImgInstance.class, ZoneType.class, ZoneInstance.class,
ZoneNumType.class, ZoneNumInstance.class, ConsolidatedType.class, ConsolidatedInstance.class, ImgPathType.class, ImgPathInstance.class, ImgTimestampType.class, ImgTimestampInstance.class);
}
public static boolean registerNewFile(Root engine, Path imgPath, Path resourcesFolder) {
logger.info("Adding a new image ({}) ", imgPath.getFileName());
String filenameExt = ModelTools.generateFileName(imgPath);
ImgType imgType = engine.find(ImgType.class);
ImgInstance imgInstance = imgType.setImg(filenameExt);
engine.getCurrentCache().flush();
if (null == imgInstance) {
logger.error("An error has occured while saving file {}", filenameExt);
return false;
} else {
Path relative = Paths.get(BASE_PATH).relativize(imgPath);
imgInstance.setImgPath(relative.toString());
imgInstance.setImgTimestamp(ModelTools.getCurrentDate());
engine.getCurrentCache().flush();
try {
Files.copy(imgPath, resourcesFolder.resolve(filenameExt), StandardCopyOption.REPLACE_EXISTING);
} catch (IOException e) {
logger.error(String.format("An error has occured while copying image %s to resources folder", filenameExt), e);
}
return true;
}
}
public static JsonObject detectFields(Path imgPath) {
try (Img deskewed = new Img(imgPath.toString())) {
List<Rect> rects = ClassifierUsingFields.detectRects(deskewed);
DocFields fields = DocFields.of(rects);
JsonObject result = fields.toJsonObject();
return result;
} catch (Exception e) {
throw new IllegalStateException("An error has occured while detecting the fields on file " + imgPath.toString(), e);
}
}
public static JsonObject processFile(Path imgPath, JsonObject jsonFields) {
if (!imgPath.isAbsolute())
throw new IllegalArgumentException("The provided path must be absolute. Got instead: " + imgPath.toString());
// Create a JsonObject for the answer
JsonObject jsonObject = new JsonObject();
jsonObject.put(FILENAME, imgPath.getFileName().toString());
jsonObject.put(ENCODED_FILENAME, ModelTools.generateFileName(imgPath));
jsonObject.put(DOC_TIMESTAMP, ModelTools.getCurrentDate());
try {
// Case where the given Path is absolute
jsonObject.put(DOC_PATH, Paths.get(BASE_PATH).relativize(imgPath).toString());
} catch (IllegalArgumentException e) {
logger.debug("Unable to find a common path between BASE_PATH and imgPath. Using the provided path instead.", e);
jsonObject.put(DOC_PATH, imgPath.toString());
}
// Get the doc fields
DocFields fields = DocFields.of(jsonFields);
// Get the imgFilterFunctions, and create a Map with the processed images
Img deskewed = new Img(imgPath.toString());
final List<ImgFilterFunction> imgFilterFunctions = FillModelWithData.getFilterFunctions();
Map<String, Img> imgs = new ConcurrentHashMap<>(imgFilterFunctions.size() + 1);
imgFilterFunctions.forEach(entry -> {
String filtername = entry.getName();
ImgFunction function = entry.getLambda();
logger.info("Applying algorithm {}...", filtername);
Img img = null;
long start = System.nanoTime();
if ("original".equals(filtername) || "reality".equals(filtername)) {
img = new Img(deskewed.getSrc(), true);
} else {
img = function.apply(deskewed);
}
long stop = System.nanoTime();
logger.info("({} ms)", (stop - start) / 1_000_000);
if (null != img) {
imgs.put(filtername, img);
} else {
logger.error("An error as occured for image {} and filter {}", imgPath.getFileName(), filtername);
}
});
// Loop through each field, and do the OCR
Map<String, JsonObject> result = new ConcurrentHashMap<>(fields.size() + 1);
fields.forEach(field -> {
logger.info("Field {}", field.getNum());
imgs.entrySet().forEach(entry -> {
if (!("reality".equals(entry.getKey()) || "best".equals(entry.getKey()))) {
// Do the ocr, and store the value in the "labels" Map
field.ocr(entry.getValue());
}
});
// Loop through the "labels" Map and choose the best text
field.consolidateOcr();
// Store the field data in a json object
JsonObject json = new JsonObject();
json.put(FIELD_NUM, field.getNum());
json.put(CONSOLIDATED, field.getConsolidated().orElse(""));
json.put(RECT, JsonObject.mapFrom(field.getRect()));
// Add this to the result
result.put(field.getUid(), json);
});
// Store the ocr in the JsonObject
jsonObject.put(ZONES, result);
// Close the resources used by OpenCV
deskewed.close();
imgs.values().forEach(Img::close);
// Return the result
return jsonObject;
}
public static void saveOcrDataInModel(Root engine, JsonObject data) {
// Parse the data
String docPath = data.getString(DOC_PATH);
String filenameExt = data.getString(ENCODED_FILENAME);
Long timestamp = data.getLong(DOC_TIMESTAMP);
JsonObject zonesResults = data.getJsonObject(ZONES);
// Get the generics
ImgType imgType = engine.find(ImgType.class);
// Set the doc instance and some attributes
ImgInstance imgInstance = imgType.setImg(filenameExt);
try {
imgInstance.setImgPath(docPath);
imgInstance.setImgTimestamp(timestamp);
} catch (RollbackException e) {
logger.debug("Filename or timestamp have already been set. Resuming task...");
} catch (Exception e) {
throw new RuntimeException(e);
}
// Save the results for each field
zonesResults.forEach(entry -> {
logger.info("Current zone: {}", entry.getKey());
JsonObject field = (JsonObject) entry.getValue();
String ocr = field.getString(CONSOLIDATED);
JsonObject rect = field.getJsonObject(RECT);
ZoneInstance zoneInstance = imgInstance.setZone(rect.encode());
zoneInstance.setConsolidated(ocr);
zoneInstance.setZoneNum(field.getInteger(FIELD_NUM));
});
// Flush the cache
engine.getCurrentCache().flush();
}
}