Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Patch Tesseract 2.04 to overcome linker errors. (#176)

As pointed out in #176, there are three solutions to the "TessBaseAPI are not dllexport'ed" problem. This commit patches Tesseract 2.04, and therefore allows the Tesseract plugin to use the same code under Windows and Linux.
  • Loading branch information...
commit 206aa359aca1a8ed58b2f41c00e838bfced9bd39 1 parent fa9ae19
Roland Richter authored
View
21 Patches/tesseract-2.04/i176-fix-export-TessBaseAPI.diff
@@ -0,0 +1,21 @@
+--- L:/tesseract-2.04/ccmain/baseapi.h Sat Apr 19 02:24:44 2008
++++ L:/tesseract-2.04-vc10/ccmain/baseapi.h Thu Jul 07 17:30:53 2011
+@@ -25,11 +25,17 @@
+ class IMAGE;
+ struct Pix;
+
++#ifdef TESSDLL_EXPORTS
++#define TESSDLL_API __declspec(dllexport)
++#else
++#define TESSDLL_API __declspec(dllimport)
++#endif
++
+ // Base class for all tesseract APIs.
+ // Specific classes can add ability to work on different inputs or produce
+ // different outputs.
+
+-class TessBaseAPI {
++class TESSDLL_API TessBaseAPI {
+ public:
+ // Set the value of an internal "variable" (of either old or new types).
+ // Supply the name of the variable and the value as a string, just as
View
32 Patches/tesseract-2.04/i176-fix-pair-ScrollView.diff
@@ -0,0 +1,32 @@
+--- L:/tesseract-2.04/viewer/scrollview.cpp Tue Jul 05 17:08:00 2011
++++ L:/tesseract-2.04-vc10/viewer/scrollview.cpp Wed Jun 03 20:57:59 2009
+@@ -133,7 +133,7 @@
+ cur->type);
+ std::pair<ScrollView*, SVEventType> awaiting_list_any(cur->window,
+ SVET_ANY);
+- std::pair<ScrollView*, SVEventType> awaiting_list_any_window(NULL,
++ std::pair<ScrollView*, SVEventType> awaiting_list_any_window((ScrollView*)NULL,
+ SVET_ANY);
+ mutex_waiting->Lock();
+ if (waiting_for_events.count(awaiting_list) > 0) {
+@@ -406,7 +406,7 @@
+ SVSemaphore* sem = new SVSemaphore();
+ std::pair<ScrollView*, SVEventType> ea(this, type);
+ mutex_waiting->Lock();
+- waiting_for_events[ea] = std::pair<SVSemaphore*, SVEvent*> (sem, NULL);
++ waiting_for_events[ea] = std::pair<SVSemaphore*, SVEvent*> (sem, (SVEvent*)NULL);
+ mutex_waiting->Unlock();
+ // Wait on it, but first flush.
+ stream_->Flush();
+@@ -424,9 +424,9 @@
+ SVEvent* ScrollView::AwaitEventAnyWindow() {
+ // Initialize the waiting semaphore.
+ SVSemaphore* sem = new SVSemaphore();
+- std::pair<ScrollView*, SVEventType> ea(NULL, SVET_ANY);
++ std::pair<ScrollView*, SVEventType> ea((ScrollView*)NULL, SVET_ANY);
+ mutex_waiting->Lock();
+- waiting_for_events[ea] = std::pair<SVSemaphore*, SVEvent*> (sem, NULL);
++ waiting_for_events[ea] = std::pair<SVSemaphore*, SVEvent*> (sem, (SVEvent*)NULL);
+ mutex_waiting->Unlock();
+ // Wait on it.
+ stream_->Flush();
View
130 Plugins/Tesseract/src/Tesseract.cpp
@@ -24,20 +24,8 @@
#include <Arguments/ByteImage.hpp>
#include <Arguments/StringValue.hpp>
-#ifdef WIN32
-
-typedef __int64 INT64;
-typedef unsigned __int64 UINT64;
-
-#include <tessdll.h>
-#include <unichar.h>
-
-#else
-
#include <baseapi.h>
-#endif
-
namespace pI {
namespace pIns {
@@ -80,53 +68,6 @@ Tesseract::Tesseract (Runtime& runtime) : Base (runtime) {
.SetDescription ("Text extracted from image using the Tesseract library."));
}
-#ifdef WIN32
-static wchar_t* make_unicode_string (const char* utf8) {
- int size = 0, out_index = 0;
- wchar_t* out;
-
- /* first calculate the size of the target string */
- int used = 0;
- int utf8_len = strlen (utf8);
-
- while (used < utf8_len) {
- int step = UNICHAR::utf8_step (utf8 + used);
-
- if (step == 0) {
- break;
- }
-
- used += step;
- ++size;
- }
-
- out = (wchar_t*) malloc ( (size + 1) * sizeof (wchar_t));
-
- if (out == NULL) {
- return NULL;
- }
-
- /* now convert to Unicode */
- used = 0;
-
- while (used < utf8_len) {
- int step = UNICHAR::utf8_step (utf8 + used);
-
- if (step == 0) {
- break;
- }
-
- UNICHAR ch (utf8 + used, step);
- out[out_index++] = ch.first_uni();
- used += step;
- }
-
- out[out_index] = 0;
-
- return out;
-}
-#endif
-
/*virtual*/ void Tesseract::Execute (Arguments& input_args, Arguments& output_args) {
CheckInputArguments (input_args, GetInputSignature());
@@ -147,78 +88,25 @@ static wchar_t* make_unicode_string (const char* utf8) {
int bpp = 8 * input.GetChannels();
unsigned char* imgPtr = & (input.GetCPtr()->data.ByteImage->data[0][0][0]);
- std::string result;
-
-#ifdef WIN32
- // The following cose follows closely main() from Tesseract's dllltest.cpp
- // Unfortunately, this is tuned to a Windows API ...
- TessDllAPI tessAPI (lang.c_str());
-
- tessAPI.BeginPageUpright (width, height, imgPtr, bpp);
-
- ETEXT_DESC* output = tessAPI.Recognize_all_Words();
-
- int j;
-
- for (int i = 0; i < output->count; i = j) {
- const EANYCODE_CHAR* ch = &output->text[i];
-
- for (int b = 0; b < ch->blanks; ++b) {
- result.push_back (' ');
- }
-
- unsigned char unistr[UNICHAR_LEN];
-
- for (j = i; j < output->count; j++) {
- const EANYCODE_CHAR* unich = &output->text[j];
-
- if (ch->left != unich->left || ch->right != unich->right ||
- ch->top != unich->top || ch->bottom != unich->bottom) {
- break;
- }
-
- unistr[j - i] = static_cast<unsigned char> (unich->char_code);
- }
-
- unistr[j - i] = '\0';
-
- wchar_t* utf16ch = make_unicode_string (reinterpret_cast<const char*> (unistr));
-
- char str[UNICHAR_LEN];
-
-#ifndef _UNICODE
-
- // If we aren't in _UNICODE mode, print string only if ascii.
- if (ch->char_code <= 0x7f) {
- sprintf (str, "%s", unistr);
- }
-
-#else
- // %S is a microsoft-special. Attempts to translate the Unicode
- // back to the current locale to print in 8 bit
- sprintf (str, "%S", utf16ch);
-#endif
- result.append (str);
- }
-
-#else
TessBaseAPI::InitWithLanguage (NULL, NULL, lang.c_str(), NULL, false, 0, NULL);
+
char* text =
TessBaseAPI::TesseractRect (imgPtr, channels, width * channels,
0, 0, width, height);
- result.assign (text);
- delete[] text;
-
- TessBaseAPI::End();
-#endif
-
GetRuntime().GetCRuntime()->FreeArgumentData (
GetRuntime().GetCRuntime(),
output_args[0].get());
pI::StringValue str (output_args[0]);
- str.SetData (result.c_str());
+ str.SetData (GetRuntime().GetCRuntime()->CopyString (GetRuntime().GetCRuntime(), text));
+
+ // WTF TessBaseAPI's documentation says that "[t]he recognized text is returned as a char* which [...]
+ // must be freed with the delete [] operator."
+ // This works under Linux, but triggers an debug assertion _BLOCK_TYPE_IS_VALID(pHead->nBlockUse)
+ // under Win/VC10. Why?
+ // delete[] text;
+ TessBaseAPI::End();
}
View
12 Plugins/Tesseract/src/Tesseract.hpp
@@ -32,10 +32,10 @@ class Tesseract: public pIn {
public:
- Tesseract(Runtime& runtime);
+ Tesseract (Runtime& runtime);
virtual ~Tesseract();
- DECLARE_VIRTUAL_COPY_CONSTRUCTOR(Tesseract)
+ DECLARE_VIRTUAL_COPY_CONSTRUCTOR (Tesseract)
virtual const pI_int GetpInVersion() const {
return 10100;
@@ -54,24 +54,24 @@ class Tesseract: public pIn {
}
virtual Arguments GetParameterSignature() const {
- return MEMBER_SIGNATURE(_parameters);
+ return MEMBER_SIGNATURE (_parameters);
}
virtual void Initialize (const Arguments& parameters);
virtual Arguments GetInputSignature() const {
- return MEMBER_SIGNATURE(_input_args);
+ return MEMBER_SIGNATURE (_input_args);
}
virtual Arguments GetOutputSignature() const {
- return MEMBER_SIGNATURE(_output_args);
+ return MEMBER_SIGNATURE (_output_args);
}
virtual void Execute (Arguments& input_args, Arguments& output_args);
protected:
- Arguments _parameters, _input_args, _output_args;
+ Arguments _parameters, _input_args, _output_args;
}; // class Tesseract: public pIn
View
2  cmake-win32-libs-vc10.conf
@@ -2,7 +2,7 @@ set(BOOST_ROOT "L:/boost_1_46_0" CACHE PATH "boost directory" FORCE)
set(CIMG_ROOT "L:/CImg-1.4.6" CACHE PATH "from http://cimg.sourceforge.net/" FORCE)
set(JSONC_ROOT "L:/json-c-0.9" CACHE PATH "from http://oss.metaparadigm.com/json-c/" FORCE)
set(POCO_ROOT "L:/poco-1.4.0-vc10" CACHE PATH "from http://pocoproject.org/" FORCE)
-set(TESSERACT_ROOT "L:/tesseract-2.04" CACHE PATH "from http://code.google.com/p/tesseract-ocr/" FORCE)
+set(TESSERACT_ROOT "L:/tesseract-2.04-vc10" CACHE PATH "from http://code.google.com/p/tesseract-ocr/" FORCE)
# By setting OpenCV_DIR, the find_package(OpenCV) is enabled:
set(OpenCV_DIR "L:/OpenCV-2.2.0-FLLL-2/build-win32-vc9" CACHE PATH "from http://opencv.willowgarage.com/" FORCE)
Please sign in to comment.
Something went wrong with that request. Please try again.