Skip to content
This repository has been archived by the owner on Nov 12, 2017. It is now read-only.

Commit

Permalink
Added Python 3 support
Browse files Browse the repository at this point in the history
  • Loading branch information
squidpickles committed Jun 26, 2014
1 parent 0ce3d8a commit 7aff611
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 10 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ tesserpy

A Python API for Tesseract

Requirements
------------
* Python >= 2.7 or >= 3.2
* NumPy >= 1.6
* Tesseract >= 3.02

Building
--------
It's the usual distutils dance -- run `python setup.py` for more details.
Expand Down
14 changes: 14 additions & 0 deletions compat.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/* Compatibility changes for compiling against Python 3.x */

#if PY_MAJOR_VERSION >= 3
#define IS_PY3K
#define Py_TPFLAGS_HAVE_ITER 0
#define PyInt_FromLong PyLong_FromLong
#define PyString_FromString PyUnicode_FromString
#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION >= 3
#define PyString_AsString(STR) PyUnicode_AsUTF8((STR))
#else
/* XXX reference leak below */
#define PyString_AsString(STR) PyBytes_AsString(PyUnicode_AsUTF8String((STR)))
#endif
#endif
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
import os

kVersionBase = '0.1dev'
kStringEncoding = 'utf-8'

version = kVersionBase + subprocess.check_output(['git', 'describe', '--dirty', '--always'])
version = kVersionBase + subprocess.check_output(['git', 'describe', '--dirty', '--always']).decode(kStringEncoding)

setup(
name = 'tesserpy',
Expand Down
60 changes: 51 additions & 9 deletions tesserpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <structmember.h>
#include <tesseract/baseapi.h>
#include <numpy/arrayobject.h>
#include "compat.hpp"

#pragma GCC diagnostic ignored "-Wwrite-strings"

Expand Down Expand Up @@ -556,9 +557,17 @@ static int PyTesseract_init(PyTesseract *self, PyObject *args, PyObject *kwargs)
tesseract::OcrEngineMode oem = tesseract::OEM_TESSERACT_ONLY;

static const char *kwlist[] = { "data_path", "language", "oem", NULL };
#ifdef IS_PY3K
PyObject *py_datapath;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|si", (char **)kwlist, PyUnicode_FSConverter, &py_datapath, &language, &oem)) {
#else
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|si", (char **)kwlist, &datapath, &language, &oem)) {
#endif
return -1;
}
#ifdef IS_PY3K
datapath = PyBytes_AsString(py_datapath);
#endif
self->page = NULL;
self->iterators = PyList_New(0);
int result = self->tess->Init(datapath, language, oem);
Expand All @@ -585,7 +594,7 @@ static PyObject* PyTesseract_clear(PyTesseract *self) {

static int PyTesseract_setattr(PyTesseract *self, PyObject *attr, PyObject *py_value) {
// attribute name must be a string, but value will be converted with str()
char *name = PyString_AsString(attr);
const char *name = PyString_AsString(attr);
if (!name) {
PyErr_SetString(PyExc_TypeError, "Attribute name must be a string");
return -1;
Expand All @@ -597,7 +606,7 @@ static int PyTesseract_setattr(PyTesseract *self, PyObject *attr, PyObject *py_v
return -1;
}

char *value = PyString_AsString(py_value_str);
const char *value = PyString_AsString(py_value_str);
bool result = self->tess->SetVariable(name, value);
Py_CLEAR(py_value_str);
if (!result) {
Expand All @@ -614,7 +623,7 @@ static PyObject* PyTesseract_getattr(PyTesseract *self, PyObject *attr) {
}
PyErr_Clear();
// attribute name must be a string
char *name = PyString_AsString(attr);
const char *name = PyString_AsString(attr);
if (!name) {
PyErr_SetString(PyExc_TypeError, "Attribute name is not a string");
return NULL;
Expand Down Expand Up @@ -791,37 +800,67 @@ static PyResultIterator* PyTesseract_blocks(PyTesseract *self) {
return iterator;
}

typedef struct {
// This module has no state
} PyModuleState;

static PyMethodDef TesserPyMethods[] = {
{ NULL, NULL } // sentinel
};

#ifdef IS_PY3K
#define INITERROR return NULL

static struct PyModuleDef TesserPyModuleDef {
PyModuleDef_HEAD_INIT,
"tesserpy", // m_name
PyDoc_STR("A Python API for Tesseract"), // m_doc
sizeof(PyModuleState), // m_size
TesserPyMethods, // m_methods
NULL, // m_reload
NULL, // m_traverse
NULL, // m_clear
NULL // m_free
};

PyMODINIT_FUNC PyInit_tesserpy(void) {
#else
#define INITERROR return

PyMODINIT_FUNC inittesserpy(void) {
#endif

PyBoundingBox_Type.tp_new = PyType_GenericNew;
if (PyType_Ready(&PyBoundingBox_Type) < 0) {
return;
INITERROR;
}

PyPageInfo_Type.tp_new = PyType_GenericNew;
if (PyType_Ready(&PyPageInfo_Type) < 0) {
return;
INITERROR;
}

PyResult_Type.tp_new = PyType_GenericNew;
if (PyType_Ready(&PyResult_Type) < 0) {
return;
INITERROR;
}

if (PyType_Ready(&PyResultIterator_Type) < 0) {
return;
INITERROR;
}

if (PyType_Ready(&PyTesseract_Type) < 0) {
return;
INITERROR;
}

#ifdef IS_PY3K
PyObject *module = PyModule_Create(&TesserPyModuleDef);
#else
PyObject *module = Py_InitModule("tesserpy", TesserPyMethods);
#endif

if (module == NULL) {
return;
INITERROR;
}

import_array();
Expand Down Expand Up @@ -884,4 +923,7 @@ PyMODINIT_FUNC inittesserpy(void) {
PyModule_AddIntConstant(module, "TEXTLINE_ORDER_LEFT_TO_RIGHT", tesseract::TEXTLINE_ORDER_LEFT_TO_RIGHT);
PyModule_AddIntConstant(module, "TEXTLINE_ORDER_RIGHT_TO_LEFT", tesseract::TEXTLINE_ORDER_RIGHT_TO_LEFT);
PyModule_AddIntConstant(module, "TEXTLINE_ORDER_TOP_TO_BOTTOM", tesseract::TEXTLINE_ORDER_TOP_TO_BOTTOM);
#ifdef IS_PY3K
return module;
#endif
}

0 comments on commit 7aff611

Please sign in to comment.