-
Notifications
You must be signed in to change notification settings - Fork 5
/
GetDataInModel.java
141 lines (126 loc) · 5.4 KB
/
GetDataInModel.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package org.genericsystem.cv.comparator;
import java.io.File;
import java.util.Arrays;
import java.util.List;
import org.genericsystem.common.Generic;
import org.genericsystem.common.Root;
import org.genericsystem.cv.Img;
import org.genericsystem.cv.Zone;
import org.genericsystem.cv.Zones;
import org.genericsystem.cv.comparator.GetDataInModel.InitScript;
import org.genericsystem.cv.model.Doc;
import org.genericsystem.cv.model.Doc.DocInstance;
import org.genericsystem.cv.model.DocClass;
import org.genericsystem.cv.model.DocClass.DocClassInstance;
import org.genericsystem.cv.model.ImgFilter;
import org.genericsystem.cv.model.ZoneGeneric;
import org.genericsystem.cv.model.ZoneGeneric.ZoneInstance;
import org.genericsystem.cv.model.ZoneText;
import org.genericsystem.reactor.annotations.DependsOnModel;
import org.genericsystem.reactor.annotations.RunScript;
import org.genericsystem.reactor.appserver.ApplicationServer;
import org.genericsystem.reactor.appserver.Script;
import org.genericsystem.reactor.gscomponents.RootTagImpl;
import org.opencv.core.Core;
import org.opencv.core.Scalar;
import org.opencv.imgcodecs.Imgcodecs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The GetDataInModel class can analyze a batch of images and store the OCR text for
* each zone and each document in GS.
*
* @author Pierrik Lassalas
*
*/
@RunScript(InitScript.class)
@DependsOnModel({ Doc.class, ImgFilter.class, ZoneGeneric.class, ZoneText.class })
public class GetDataInModel extends RootTagImpl {
private static Logger log = LoggerFactory.getLogger(GetDataInModel.class);
public static void main(String[] mainArgs) {
System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
ApplicationServer.startSimpleGenericApp(mainArgs, GetDataInModel.class, "/gs-cv_model");
}
public static class InitScript implements Script {
@Override
public void run(Root engine) {
String docType = "id-fr-front";
String imgClassDirectory = "classes/" + docType;
log.info("imgClassDirectory = {} ", imgClassDirectory);
// Get the necessary classes from the engine
DocClass docClass = engine.find(DocClass.class);
Generic doc = engine.find(Doc.class);
ZoneText zoneText = engine.find(ZoneText.class);
ImgFilter imgFilter = engine.find(ImgFilter.class);
// Save the current document class
DocClassInstance docClassInstance = docClass.addDocClass(docType);
// Set all the filternames
String[] imgF = { "reality", "original", "abutaleb", "bernsen", "brink", "djvu", "niblack", "otsu",
"sauvola", "shading-subtraction", "tsai", "white-rohrer" };
List<String> imgFilters = Arrays.asList(imgF);
// Load the accurate zones
final Zones zones = Zones.load(imgClassDirectory);
// Save the zones
zones.getZones().forEach(z -> {
log.info("Adding zone n° {}", z.getNum());
docClassInstance.addZone(z.getNum(), z.getRect().x, z.getRect().y, z.getRect().width,
z.getRect().height);
});
// Save the filternames
imgFilters.forEach(f -> {
log.info("Adding filter : {} ", f);
imgFilter.addImgFilter(f);
});
// Persist the changes
engine.getCurrentCache().flush();
// Process each file in the subfolder "/ref/"
// TODO : use a new cache after each iteration to avoid loosing too much data when an error occurs
Arrays.asList(new File(imgClassDirectory + "/ref/").listFiles((dir, name) -> name.endsWith(".png")))
.stream().forEach(file -> {
log.info("\nProcessing file: {}", file.getName());
// Draw the image's zones + numbers
Img originalImg = new Img(Imgcodecs.imread(file.getPath()));
zones.draw(originalImg, new Scalar(0, 255, 0), 3);
zones.writeNum(originalImg, new Scalar(0, 0, 255), 3);
// Copy the images to the resources folder
// TODO implement a filter mechanism to avoid creating
// duplicates in a public folder
Imgcodecs.imwrite(System.getProperty("user.dir") + "/src/main/resources/" + file.getName(),
originalImg.getSrc());
// Save the current file (document)
DocInstance docInstance = docClassInstance.addDoc(docClassInstance, doc, file.getName());
// Process each zone
zones.getZones().stream().forEach(z -> {
log.info("Zone n° {}", z.getNum());
// Save the zone
ZoneInstance zoneInstance = docClassInstance.getZone(z.getNum());
for (String filter : imgFilters) {
Img filteredImage;
// "reality" is initialized with the original
// picture if the text has not been filled yet
if ("original".equals(filter) || "reality".equals(filter)) {
if ("reality".equals(filter) && null != zoneText.getZoneText(docInstance,
zoneInstance, imgFilter.getImgFilter(filter))) {
continue;
}
filteredImage = new Img(
Imgcodecs.imread(imgClassDirectory + "/ref/" + file.getName()));
} else {
filteredImage = new Img(Imgcodecs.imread(imgClassDirectory + "/mask/" + filter + "/"
+ file.getName().replace(".png", "") + "-" + filter + ".png"));
}
// Get the OCR text
String ocrText = z.ocr(filteredImage);
log.info("filter {} => {}", filter, ocrText.trim());
// Add the text to the corresponding zone
zoneText.addZoneText(ocrText, docInstance, zoneInstance,
imgFilter.getImgFilter(filter));
}
// Call the garbage collector to free the resources
System.gc();
});
});
engine.getCurrentCache().flush();
}
}
}