-
Notifications
You must be signed in to change notification settings - Fork 16
/
globalconfig.pxi
281 lines (205 loc) · 7.42 KB
/
globalconfig.pxi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
from pyxpdf.includes.GlobalParams cimport GlobalParams, globalParams, EndOfLineKind
from pyxpdf.includes.UnicodeMap cimport UnicodeMap
# NOTE: This class should be always a singleton
# only one object of this class should exist i.e
# global variable `Config`
# This is because xpdf `GlobalParams` class's destructor
# frees global builtin font tables. So more that one
# `_GlobalParamsConfig` class will lead to double free
# or corruption error.
cdef class _GlobalParamsConfig:
cdef:
object cfg_path
GlobalParams* _global
public object __doc__
cdef _set_defaults(self):
# only call after initializing self._global
# default text encoding
self._global.setTextEncoding("UTF-8")
cdef _get_default_xpdfrc(self):
cdef:
object pyxpdf_data
object cfg = None
try:
import pyxpdf_data
cfg = pyxpdf_data.get_xpdfrc()
except ImportError:
pass
else:
del pyxpdf_data
return cfg
def load_file(self, cfg_path=None):
global globalParams
if globalParams != NULL:
del globalParams
if cfg_path == None:
self._global = new GlobalParams(<const char*>NULL)
else:
self._global = new GlobalParams(_chars(cfg_path))
if self._global == NULL:
raise MemoryError("Cannot create GlobalParamsConfig object.")
self._set_defaults()
globalParams = self._global
def reset(self):
self.load_file(self.cfg_path)
def __cinit__(self):
self._global = NULL
self.cfg_path = self._get_default_xpdfrc()
self.load_file(self.cfg_path)
def __dealloc__(self):
global globalParams
globalParams = NULL
del self._global
def setup_base_fonts(self, dir):
self._global.setupBaseFonts(_chars(dir))
def add_font_file(self, font_name, file):
# GlobalParams free these GStrings in its destructor
self._global.addFontFile(to_GString(font_name), to_GString(file))
@property
def base_dir(self):
return GString_to_unicode(self._global.getBaseDir())
@base_dir.setter
def base_dir(self, dir):
self._global.setBaseDir(_chars(dir))
def map_name_to_unicode(self, char_name):
return self._global.mapNameToUnicode(_chars(char_name))
@property
def ps_paper_width(self):
return self._global.getPSPaperWidth()
@ps_paper_width.setter
def ps_paper_width(self, int width):
self._global.setPSPaperWidth(width)
@property
def ps_paper_height(self):
return self._global.getPSPaperHeight()
@ps_paper_height.setter
def ps_paper_height(self, int height):
self._global.setPSPaperHeight(height)
@property
def enable_freetype(self):
return GBool_to_bool(self._global.getEnableFreeType())
@enable_freetype.setter
def enable_freetype(self, enable):
self._global.setEnableFreeType('yes' if enable == True else 'no')
@property
def anti_alias(self):
return GBool_to_bool(self._global.getAntialias())
@anti_alias.setter
def anti_alias(self, enable):
self._global.setAntialias('yes' if enable == True else 'no')
@property
def vector_anti_alias(self):
return GBool_to_bool(self._global.getVectorAntialias())
@vector_anti_alias.setter
def vector_anti_alias(self, enable):
self._global.setVectorAntialias('yes' if enable == True else 'no')
@property
def text_encoding(self):
cdef:
unique_ptr[GString] gstr
gstr.reset(self._global.getTextEncodingName())
return GString_to_unicode(gstr.get())
@text_encoding.setter
def text_encoding(self, encoding):
cdef UnicodeMap* umap
self._global.setTextEncoding(_chars(encoding))
umap = self._global.getTextEncoding()
if umap == NULL:
raise XPDFConfigError(f"No UnicodeMap file associated with {encoding} found.")
else:
umap.decRefCnt()
@property
def text_eol(self):
cdef EndOfLineKind eol = self._global.getTextEOL()
if eol == EndOfLineKind.eolUnix:
return "unix"
elif eol == EndOfLineKind.eolDOS:
return "dos"
else:
return "mac"
@text_eol.setter
def text_eol(self, eol):
# cdef EndOfLineKind c_eol
if eol.lower() in (u'unix', u'mac', u'dos'):
self._global.setTextEOL(_chars(eol))
else:
raise XPDFConfigError(f"Invalid EOL type - {eol}.")
@property
def text_page_breaks(self):
return GBool_to_bool(self._global.getTextPageBreaks())
@text_page_breaks.setter
def text_page_breaks(self, breaks):
self._global.setTextPageBreaks(to_GBool(breaks))
@property
def text_keep_tiny(self):
return GBool_to_bool(self._global.getTextKeepTinyChars())
@text_keep_tiny.setter
def text_keep_tiny(self, keep):
self._global.setTextKeepTinyChars(to_GBool(keep))
@property
def print_commands(self):
return GBool_to_bool(self._global.getPrintCommands())
@print_commands.setter
def print_commands(self, print_cmd):
self._global.setPrintCommands(to_GBool(print_cmd))
@property
def error_quiet(self):
return GBool_to_bool(self._global.getErrQuiet())
@error_quiet.setter
def error_quiet(self, quiet):
self._global.setErrQuiet(to_GBool(quiet))
@property
def default_text_encoding(self):
return self._global.defaultTextEncoding.decode('UTF-8')
Config = _GlobalParamsConfig.__new__(_GlobalParamsConfig)
Config.__doc__ = \
"""
Global XPDF config object
Methods
-------
Config.reset
Reset the global configuration to default.
Config.load_file(cfg_path)
load the settings from given `cfg_path` `xpdfrc`.
Config.add_font_file(font_name, file)
Maps a PDF Font `font_name` to font from path `file`.
The font files can be Type 1 (.pfa or .pfb) or TrueType
(.ttf or .ttc)
Attributes
----------
Config.text_encoding : str,
Sets the encoding to use for text output. 'UTF-8', 'Latin1', 'ASCII7',
'Symbol', 'ZapfDingbats', 'UCS-2' is pre defined. For more encodings
support install ``pyxpdf_data`` package (see :ref:`Installation`).
(default is `UTF-8`)
Config.text_eol : {'unix', 'dos', 'mac'}
Sets the end-of-line convention to use for text output. The
options are
unix = LF
dos = CR+LF
mac = CR
(default, platform dependent)
Config.text_page_breaks : bool
If set to `True`, text extraction will insert page breaks (form
feed characters) between pages.
(default is True)
Config.text_keep_tiny : bool
If set to `True`, text extraction will keep all characters. If
set to "no", text extraction will discard tiny (smaller than 3
point) characters after the first 50000 per page, avoiding
extremely slow run times for PDF files that use special fonts to
do shading or cross-hatching.
(default is `True`)
Config.enable_freetype : bool
Enables or disables use of FreeType (a TrueType/Type 1 font
rasterizer).
(default is `True`)
Config.anti_alias : bool
Enables or disables font anti-aliasing in the PDF Output Devices.
This option affects all font rasterizers.
(default is `True`)
Config.vector_anti_alias : bool
Enables or disables anti-aliasing of vector graphics in the PDF
rasterizer.
(default is 'True')
"""