Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

some wordings and documentations

git-svn-id: https://pdfminerr.googlecode.com/svn/trunk/pdfminer@229 1aa58f4a-7d42-0410-adbc-911cccaed67c
commit f5aff374fcd22d50a68dfc2d44ce5f8acfde91ea 1 parent a0dd46b
yusuke.shinyama.dummy authored
Showing with 23 additions and 5 deletions.
  1. +1 −1  cmaprsrc/README.txt
  2. +18 −1 pdfminer/layout.py
  3. +4 −3 samples/README
View
2  cmaprsrc/README.txt
@@ -1,7 +1,7 @@
README.txt for cmaprsrc
This directory contains Adobe CMap resources. CMaps are required
-to decode text data written in Chinese, Japanese or Korean language.
+to decode text data written in CJK (Chinese, Japanese, Korean) language.
CMap resources are now available freely from Adobe web site:
http://opensource.adobe.com/wiki/display/cmap/CMap+Resources
View
19 pdfminer/layout.py
@@ -559,15 +559,21 @@ class LTAnalyzer(LTContainer):
def analyze(self, laparams):
"""Perform the layout analysis."""
(textobjs, otherobjs) = self.get_textobjs()
+ # textobjs is a list of LTChar objects, i.e.
+ # it has all the individual characters in the page.
if not laparams or not textobjs: return
if laparams.writing_mode not in ('lr-tb', 'tb-rl'):
laparams.writing_mode = guess_wmode(textobjs)
if (laparams.writing_mode.startswith('tb-') or
laparams.writing_mode.startswith('bt-')):
+ # assemble them into vertical rows of text.
textboxes = self.build_textbox_vertical(textobjs, laparams)
+ # turn them into a tree.
top = self.group_textbox_tb_rl(textboxes, laparams)
else:
+ # assemble them into horizontal rows of text.
textboxes = self.build_textbox_horizontal(textobjs, laparams)
+ # turn them into a tree.
top = self.group_textbox_lr_tb(textboxes, laparams)
def assign_index(obj, i):
if isinstance(obj, LTTextBox):
@@ -635,7 +641,7 @@ def aligned(obj1, obj2):
# | |
# +------+
#
- # |<--->|
+ # |<-->|
# (line_overlap)
return ((min(obj1.width, obj2.width) * laparams.line_overlap < obj1.hoverlap(obj2)) and
(obj1.vdistance(obj2) < min(obj1.height, obj2.height) * laparams.char_margin))
@@ -656,6 +662,17 @@ def aligned(obj1, obj2):
def group_textbox_lr_tb(self, boxes, laparams):
def dist(obj1, obj2):
+ """A distance function between two TextBoxes.
+
+ Consider the bounding rectangle for obj1 and obj2.
+ Return its area less the areas of obj1 and obj2,
+ shown as 'www' below. This value may be negative.
+ +------+..........+
+ | obj1 |wwwwwwwwww:
+ +------+www+------+
+ :wwwwwwwwww| obj2 |
+ +..........+------+
+ """
return ((max(obj1.x1,obj2.x1) - min(obj1.x0,obj2.x0)) *
(max(obj1.y1,obj2.y1) - min(obj1.y0,obj2.y0)) -
(obj1.width*obj1.height + obj2.width*obj2.height))
View
7 samples/README
@@ -1,7 +1,8 @@
This directory contains sample PDF files.
-The files in nonfree/ subdirectory can be distributed freely
-but does not come with explicit licensing terms or source files.
+These files (including ones in nonfree/ subdirectory) can be
+distributed freely but does not come with explicit licensing
+terms or source files.
Here are the credits of the original files:
@@ -16,7 +17,7 @@ simple2.pdf:
jo.pdf:
Kenji Miyazawa (1896-1933, copyright expired)
Preface of "Haru to Shura"
- (File generated by LaTeX and dvi2pdfm)
+ (File generated from jo.tex by LaTeX and dvi2pdfm)
--
nonfree/dmca.pdf:
Please sign in to comment.
Something went wrong with that request. Please try again.