-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Greg Jurman
committed
Mar 19, 2013
1 parent
35052c0
commit 7aa422f
Showing
10 changed files
with
227 additions
and
107 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,3 +6,5 @@ README.pdf | |
dist | ||
*.egg | ||
*.egg* | ||
*.o | ||
*.so |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,24 @@ | ||
from tesseract import * | ||
from .core import tr | ||
import ctypes | ||
|
||
__all__ = ["Tesseract"] | ||
|
||
class Tesseract(object): | ||
def __init__(self, datadir="", lang="eng"): | ||
self.handle = tr.Tesserwrap_Init( | ||
bytes(datadir, "ascii"), | ||
bytes(lang, "ascii")) | ||
|
||
def __del__(self): | ||
try: | ||
if self.handle and core: | ||
try: | ||
tr | ||
except AttributeError: | ||
# dll isn't active for some reason.. | ||
return | ||
tr.Tesserwrap_Destroy(self.handle) | ||
self.handle = None | ||
except AttributeError: | ||
print("__del__ without handle release") | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
import atexit, os, re, sys | ||
from ctypes import * | ||
from ctypes.util import find_library | ||
|
||
import distutils | ||
def get_shared_lib_extension(is_python_ext=False): | ||
"""Return the correct file extension for shared libraries. | ||
Parameters | ||
---------- | ||
is_python_ext : bool, optional | ||
Whether the shared library is a Python extension. Default is False. | ||
Returns | ||
------- | ||
so_ext : str | ||
The shared library extension. | ||
Notes | ||
----- | ||
For Python shared libs, `so_ext` will typically be '.so' on Linux and OS X, | ||
and '.pyd' on Windows. For Python >= 3.2 `so_ext` has a tag prepended on | ||
POSIX systems according to PEP 3149. For Python 3.2 this is implemented on | ||
Linux, but not on OS X. | ||
""" | ||
so_ext = distutils.sysconfig.get_config_var('SO') or '' | ||
# fix long extension for Python >=3.2, see PEP 3149. | ||
if (not is_python_ext) and 'SOABI' in distutils.sysconfig.get_config_vars(): | ||
# Does nothing unless SOABI config var exists | ||
so_ext = so_ext.replace('.' + distutils.sysconfig.get_config_var('SOABI'), '', 1) | ||
|
||
return so_ext | ||
|
||
def load_library(libname, loader_path): | ||
"""Load a DLL via ctypes load function. Return None on failure. | ||
Try loading the DLL from the current package directory first, | ||
then from the Windows DLL search path. | ||
""" | ||
so_ext = get_shared_lib_extension() | ||
libname_ext = [libname + so_ext] | ||
if sys.version[:3] >= '3.2': | ||
# For Python >= 3.2 a tag may be added to lib extension | ||
# (platform dependent). If we find such a tag, try both with | ||
# and without it. | ||
so_ext2 = get_shared_lib_extension(is_python_ext=True) | ||
if not so_ext2 == so_ext: | ||
libname_ext.insert(0, libname + so_ext2) | ||
|
||
loader_path = os.path.abspath(loader_path) | ||
if not os.path.isdir(loader_path): | ||
libdir = os.path.dirname(loader_path) | ||
else: | ||
libdir = loader_path | ||
|
||
# HACK | ||
libdir = os.path.abspath(libdir + "/..") | ||
|
||
# Need to save exception when using Python 3k, see PEP 3110. | ||
exc = None | ||
for ln in libname_ext: | ||
try: | ||
libpath = os.path.join(libdir, ln) | ||
return cdll[libpath] | ||
except OSError as e: | ||
exc = e | ||
raise exc | ||
|
||
tr = load_library('libtesserwrap', os.path.dirname(__file__)) | ||
|
||
tr.Tesserwrap_Init.restype = c_void_p | ||
tr.Tesserwrap_Init.argtypes = [c_char_p, c_char_p] | ||
|
||
tr.Tesserwrap_Destroy.argtypes = [c_void_p] | ||
tr.Tesserwrap_Destroy.restype = None |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#include "tesseract_ext.h" | ||
|
||
TessBaseAPIExt::TessBaseAPIExt(void) | ||
:picture(NULL){} | ||
|
||
TessBaseAPIExt::~TessBaseAPIExt(void) | ||
{ | ||
if(picture) delete [] picture; | ||
this->End(); | ||
} | ||
|
||
const char* TessBaseAPIExt::TesseractRect(const unsigned char *data, | ||
int bytes_per_pixel, int bytes_per_line, | ||
int left, int top, int width, int height) | ||
{ | ||
return super::TesseractRect(data, bytes_per_pixel, bytes_per_line, | ||
left, top, width, height); | ||
|
||
} | ||
|
||
void TessBaseAPIExt::SetImage(const unsigned char *data, uint64_t size, | ||
uint64_t width, uint64_t height) | ||
{ | ||
if(picture) delete [] picture; | ||
picture = new unsigned char[size]; | ||
std::memcpy(picture, data, size); | ||
super::SetImage(picture, width, height, 1, width); | ||
this->SetRectangle(0, 0, width, height); | ||
} | ||
|
||
const char *TessBaseAPIExt::GetUTF8Text(void) | ||
{ | ||
return super::GetUTF8Text(); | ||
} | ||
|
||
void TessBaseAPIExt::GetRectangle(uint64_t *left, uint64_t *top, uint64_t *width, uint64_t *height) | ||
{ | ||
(*left) = this->rect_left_; | ||
(*top) = this->rect_top_; | ||
(*width) = this->rect_width_; | ||
(*height) = this->rect_height_; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#include <tesseract/baseapi.h> | ||
#include <tesseract/publictypes.h> | ||
#include <stdint.h> | ||
#include <cstring> | ||
|
||
class TessBaseAPIExt : public tesseract::TessBaseAPI | ||
{ | ||
private: | ||
unsigned char *picture; | ||
typedef tesseract::TessBaseAPI super; | ||
|
||
public: | ||
TessBaseAPIExt(void); | ||
~TessBaseAPIExt(void); // Default destructor | ||
const char* TesseractRect(const unsigned char *data, | ||
int bytes_per_pixel, int bytes_per_line, | ||
int left, int top, int width, int height); | ||
void GetRectangle(uint64_t *, uint64_t *, uint64_t *, uint64_t *); | ||
void SetImage(const unsigned char *data, uint64_t size, uint64_t width, uint64_t height); | ||
const char * GetUTF8Text(void); | ||
}; | ||
|
||
typedef TessBaseAPIExt *TessH; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,51 @@ | ||
#include "tesseract_wrap.h" | ||
#include <cstring> | ||
#include <iostream> | ||
|
||
TessBaseAPIExt::TessBaseAPIExt(void) | ||
:picture(NULL){} | ||
|
||
TessBaseAPIExt::~TessBaseAPIExt(void) | ||
TESSERWRAP_CAPI TessH Tesserwrap_Init(const char *datadir, const char *lang) | ||
{ | ||
if(picture) delete [] picture; | ||
this->End(); | ||
TessH h = new TessBaseAPIExt(); | ||
h->Init(datadir, lang); | ||
return (TessH) h; | ||
} | ||
|
||
const char* TessBaseAPIExt::TesseractRect(string data, | ||
int bytes_per_pixel, int bytes_per_line, | ||
int left, int top, int width, int height) | ||
TESSERWRAP_CAPI void Tesserwrap_Destroy(TessH tesserwrap) | ||
{ | ||
return super::TesseractRect((const unsigned char*)data.c_str(), bytes_per_pixel, bytes_per_line, | ||
left, top, width, height); | ||
|
||
TessBaseAPIExt *api = (TessBaseAPIExt*) tesserwrap; | ||
if (api) delete api; | ||
} | ||
|
||
void TessBaseAPIExt::SetImage(string data, uint64_t w, uint64_t h) | ||
TESSERWRAP_CAPI void Tesserwrap_GetRectangle(TessH tesserwrap, | ||
uint64_t *left, uint64_t *top, | ||
uint64_t *width, uint64_t *height) | ||
{ | ||
if(picture) delete [] picture; | ||
picture = new unsigned char[data.length()]; | ||
std::memcpy(picture, (unsigned char*)data.c_str(), data.length()); | ||
super::SetImage(picture, (int)w, (int)h, 1, (int)w); | ||
this->SetRectangle(0, 0, w, h); | ||
TessBaseAPIExt *api = (TessBaseAPIExt*) tesserwrap; | ||
api->GetRectangle(left, top, width, height); | ||
} | ||
|
||
string TessBaseAPIExt::GetUTF8Text(void) | ||
TESSERWRAP_CAPI void Tesserwrap_SetRecangle(TessH tesserwrap, | ||
uint64_t *left, uint64_t *top, | ||
uint64_t *width, uint64_t *height) | ||
{ | ||
return string(super::GetUTF8Text()); | ||
TessBaseAPIExt *api = (TessBaseAPIExt*) tesserwrap; | ||
api->SetRectangle(*left, *top, *width, *height); | ||
} | ||
|
||
void TessBaseAPIExt::GetRectangle(uint64_t **rect) | ||
TESSERWRAP_CAPI void Tesserwrap_SetImage(TessH tesserwrap, | ||
const unsigned char *picture, uint64_t size, uint64_t width, uint64_t height) | ||
{ | ||
(*rect) = new uint64_t[4]; | ||
(*rect)[0] = this->rect_left_; | ||
(*rect)[1] = this->rect_top_; | ||
(*rect)[2] = this->rect_width_; | ||
(*rect)[3] = this->rect_height_; | ||
TessBaseAPIExt *api = (TessBaseAPIExt*) tesserwrap; | ||
api->SetImage(picture, size, width, height); | ||
} | ||
|
||
|
||
|
||
|
||
TESSERWRAP_CAPI TessH Init_Tesserwrap(const char *datadir, const char *lang) | ||
TESSERWRAP_CAPI void Tesserwrap_SetPageSegMode(TessH tesserwrap, | ||
tesseract::PageSegMode pageseg) | ||
{ | ||
TessH h = new TessBaseAPIExt(); | ||
h->Init(datadir, lang); | ||
return (TessH) h; | ||
TessBaseAPIExt *api = (TessBaseAPIExt*) tesserwrap; | ||
api->SetPageSegMode(pageseg); | ||
} | ||
|
||
TESSERWRAP_CAPI void Destroy_Tesserwrap(TessH tesserwrap) | ||
TESSERWRAP_CAPI tesseract::PageSegMode Tesserwrap_GetPageSegMode(TessH tesserwrap) | ||
{ | ||
TessBaseAPIExt *api = (TessBaseAPIExt*) tesserwrap; | ||
if (api) delete api; | ||
return api->GetPageSegMode(); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,38 +1,37 @@ | ||
#include <tesseract/baseapi.h> | ||
#include <string> | ||
#include <stdint.h> | ||
#include "tesseract_ext.h" | ||
|
||
using namespace tesseract; | ||
using namespace std; | ||
|
||
#if defined(USE_GCC_VISIBILITY_FLAG) | ||
# define TESSERWRAP_CAPI __attribute__ ((visibility("default"))) | ||
# else | ||
# define TESSERWRAP_CAPI | ||
# endif | ||
#ifdef __cplusplus | ||
# define TR_C_START extern "C" { | ||
# define TR_C_END } | ||
#else | ||
# define TR_C_START | ||
# define TR_C_END | ||
#endif | ||
#define TESSERWRAP_CAPI __attribute__ ((visibility("default"))) | ||
|
||
TR_C_START | ||
|
||
enum PILImageFormat | ||
{ | ||
L = 0, RGB=3, RGBA=4 | ||
}; | ||
|
||
class TessBaseAPIExt : public TessBaseAPI | ||
{ | ||
private: | ||
unsigned char *picture; | ||
typedef TessBaseAPI super; | ||
|
||
public: | ||
TessBaseAPIExt(void); | ||
~TessBaseAPIExt(void); // Default destructor | ||
const char* TesseractRect(string data, | ||
int bytes_per_pixel, int bytes_per_line, | ||
int left, int top, int width, int height); | ||
void GetRectangle(uint64_t **); | ||
void SetImage(string data, uint64_t w, uint64_t h); | ||
string GetUTF8Text(void); | ||
}; | ||
typedef TessBaseAPIExt* TessH; | ||
|
||
typedef TessBaseAPIExt *TessH; | ||
TESSERWRAP_CAPI TessH Tesserwrap_Init(const char *datadir, const char *lang); | ||
TESSERWRAP_CAPI void Tesserwrap_Destroy(TessH tesserwrap); | ||
TESSERWRAP_CAPI void Tesserwrap_GetRectangle(TessH tesserwrap, | ||
uint64_t *left, uint64_t *top, | ||
uint64_t *width, uint64_t *height); | ||
TESSERWRAP_CAPI void Tesserwrap_SetRecangle(TessH tesserwrap, | ||
uint64_t *left, uint64_t *top, | ||
uint64_t *width, uint64_t *height); | ||
TESSERWRAP_CAPI void Tesserwrap_SetImage(TessH tesserwrap, | ||
const unsigned char *picture, uint64_t size, uint64_t width, uint64_t height); | ||
TESSERWRAP_CAPI void Tesserwrap_SetPageSegMode(TessH tesserwrap, | ||
tesseract::PageSegMode pageseg); | ||
TESSERWRAP_CAPI tesseract::PageSegMode Tesserwrap_GetPageSegMode(TessH tesserwrap); | ||
|
||
TR_C_END |
This file was deleted.
Oops, something went wrong.