Skip to content

Commit

Permalink
* Add GetComponentImagesExample for Tesseract (pull #673)
Browse files Browse the repository at this point in the history
  • Loading branch information
ajeans authored and saudet committed Jan 19, 2019
1 parent fb79284 commit 577fe8c
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@

* Add `GetComponentImagesExample` for Tesseract ([pull #673](https://github.com/bytedeco/javacpp-presets/pull/673))
* Add presets for nGraph 0.11.1 ([pull #642](https://github.com/bytedeco/javacpp-presets/pull/642))
* Upgrade presets for ARPACK-NG 3.7.0
* Build OpenCV without UI when environment variable `HEADLESS=yes` ([pull #667](https://github.com/bytedeco/javacpp-presets/pull/667))
Expand Down
24 changes: 24 additions & 0 deletions tesseract/samples/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.bytedeco.javacpp-presets</groupId>
<artifactId>tesseract-samples</artifactId>
<version>4.0.0-1.4.4</version>
<name>JavaCPP Presets Samples for Tesseract</name>

<properties>
<maven.compiler.target>1.7</maven.compiler.target>
<maven.compiler.source>1.7</maven.compiler.source>
</properties>

<dependencies>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tesseract-platform</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>

</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

package org.bytedeco.javacpp.samples.tesseract;

import java.io.File;
import java.net.URL;
import org.bytedeco.javacpp.*;
import static org.bytedeco.javacpp.lept.*;
import static org.bytedeco.javacpp.tesseract.*;

/**
* To run this program, you need to configure:
* <ul>
* <li>An environment variable pointing to the dictionaries installed on the system
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li>
* </ul>
*
* @author Arnaud Jeansen
*/
public class GetComponentImagesExample {
public static void main(String[] args) throws Exception {
BytePointer outText;

TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) {
System.err.println("Could not initialize tesseract.");
System.exit(1);
}

// Open input image with leptonica library
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png");
File file = Loader.cacheResource(url);
PIX image = pixRead(file.getAbsolutePath());
api.SetImage(image);

// Lookup all component images
int[] blockIds = {};
BOXA boxes = api.GetComponentImages(RIL_TEXTLINE, true, null, blockIds);

for (int i = 0; i < boxes.n(); i++) {
// For each image box, OCR within its area
BOX box = boxes.box(i);
api.SetRectangle(box.x(), box.y(), box.w(), box.h());
outText = api.GetUTF8Text();
String ocrResult = outText.getString();
int conf = api.MeanTextConf();

String boxInformation = String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box.x(), box.y(), box.w(), box.h(), conf, ocrResult);
System.out.println(boxInformation);

outText.deallocate();
}

// Destroy used object and release memory
api.End();
pixDestroy(image);
}
}

0 comments on commit 577fe8c

Please sign in to comment.