Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More tesseract examples #675

Merged
merged 4 commits into from
Jan 21, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@

package org.bytedeco.javacpp.samples.tesseract;

import static org.bytedeco.javacpp.lept.*;
import static org.bytedeco.javacpp.tesseract.*;

import java.io.File;
import java.net.URL;

import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.Loader;

/**
* To run this program, you need to configure:
* <ul>
* <li>An environment variable pointing to the dictionaries installed on the system
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li>
* <li>An environment variable to tweak the Locale
* LC_ALL=C</li>
* </ul>
*
* @author Arnaud Jeansen
*/
public class IteratorOverClassifierChoicesExample {
public static void main(String[] args) throws Exception {
BytePointer outText;
BytePointer choiceText;

TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) {
System.err.println("Could not initialize tesseract.");
System.exit(1);
}

// Open input image with leptonica library
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png");
File file = Loader.cacheResource(url);
PIX image = pixRead(file.getAbsolutePath());
api.SetImage(image);

ETEXT_DESC recoc = TessMonitorCreate();
api.Recognize(recoc);

ResultIterator ri = api.GetIterator();
int pageIteratorLevel = RIL_SYMBOL;
if (ri != null) {
do {
outText = ri.GetUTF8Text(pageIteratorLevel);
float conf = ri.Confidence(pageIteratorLevel);
String symbolInformation = String.format("symbol: '%s'; \tconf: %.2f", outText.getString(), conf);
System.out.println(symbolInformation);
boolean indent = false;
ChoiceIterator ci = TessResultIteratorGetChoiceIterator(ri);
do {
if (indent)
System.out.print("\t\t");
System.out.print("\t-");
choiceText = ci.GetUTF8Text();
System.out.println(String.format("%s conf: %f", choiceText.getString(), ci.Confidence()));
indent = true;
choiceText.deallocate();
} while (ci.Next());

outText.deallocate();
} while (ri.Next(pageIteratorLevel));
}

// Destroy used object and release memory
api.End();
pixDestroy(image);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@

package org.bytedeco.javacpp.samples.tesseract;

import static org.bytedeco.javacpp.lept.*;
import static org.bytedeco.javacpp.tesseract.*;

import java.io.File;
import java.net.URL;

import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.Loader;
import org.bytedeco.javacpp.tesseract;

/**
* To run this program, you need to configure:
* <ul>
* <li>An environment variable pointing to the dictionaries installed on the system
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li>
* <li>An environment variable to tweak the Locale
* LC_ALL=C</li>
* </ul>
*
* @author Arnaud Jeansen
*/
public class OrientationAndScriptDetectionExample {
public static void main(String[] args) throws Exception {
BytePointer outText;

TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) {
System.err.println("Could not initialize tesseract.");
System.exit(1);
}

// Open input image with leptonica library
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png");
File file = Loader.cacheResource(url);
PIX image = pixRead(file.getAbsolutePath());
api.SetPageSegMode(PSM_AUTO_OSD);
api.SetImage(image);
tesseract.ETEXT_DESC reco = TessMonitorCreate();
api.Recognize(reco);

tesseract.PageIterator iterator = api.AnalyseLayout();
int[] orientation = new int[1];
int[] writing_direction = new int[1];
int[] textline_order = new int[1];
float[] deskew_angle = new float[1];

iterator.Orientation(orientation, writing_direction, textline_order, deskew_angle);
String osdInformation = String.format("Orientation: %d;\nWritingDirection: %d\nTextlineOrder: %d\nDeskew angle: %.4f\n",
orientation[0], writing_direction[0], textline_order[0], deskew_angle[0]);
System.out.println(osdInformation);

// Destroy used object and release memory
api.End();
pixDestroy(image);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@

package org.bytedeco.javacpp.samples.tesseract;

import static org.bytedeco.javacpp.lept.*;
import static org.bytedeco.javacpp.tesseract.*;

import java.io.File;
import java.net.URL;

import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.javacpp.Loader;
import org.bytedeco.javacpp.tesseract;

/**
* To run this program, you need to configure:
* <ul>
* <li>An environment variable pointing to the dictionaries installed on the system
* TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00</li>
* <li>An environment variable to tweak the Locale
* LC_ALL=C</li>
* </ul>
*
* @author Arnaud Jeansen
*/
public class ResultIteratorExample {
public static void main(String[] args) throws Exception {
BytePointer outText;

TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract-ocr with English, intializing tessdata path with the standard ENV variable
if (api.Init(System.getenv("TESSDATA_PREFIX") + "/tessdata", "eng") != 0) {
System.err.println("Could not initialize tesseract.");
System.exit(1);
}

// Open input image with leptonica library
URL url = new URL("https://upload.wikimedia.org/wikipedia/commons/thumb/7/74/Computer_modern_sample.svg/1920px-Computer_modern_sample.svg.png");
File file = Loader.cacheResource(url);
PIX image = pixRead(file.getAbsolutePath());
api.SetImage(image);

tesseract.ETEXT_DESC recoc = TessMonitorCreate();
api.Recognize(recoc);

tesseract.ResultIterator ri = api.GetIterator();
int pageIteratorLevel = RIL_WORD;
if (ri != null) {
do {
outText = ri.GetUTF8Text(pageIteratorLevel);
float conf = ri.Confidence(pageIteratorLevel);
int[] x1 = new int[1], y1 = new int[1], x2 = new int[1], y2 = new int[1];
ri.BoundingBox(pageIteratorLevel, x1, y1, x2, y2);
String riInformation = String.format("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n", outText.getString(), conf, x1[0], y1[0], x2[0], y2[0]);
System.out.println(riInformation);

outText.deallocate();
} while (ri.Next(pageIteratorLevel));
}

// Destroy used object and release memory
api.End();
pixDestroy(image);
}
}