forked from oyvindberg/PDFExtract
-
Notifications
You must be signed in to change notification settings - Fork 3
/
README
42 lines (31 loc) · 1.04 KB
/
README
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#######################################
# How to build PDFExtract from source
#######################################
# 1. create a folder for the projects and cd to it
#
# 2. install TEI P5 model
#
git clone http://github.com/elacin/TEI-P5-Java-model.git
cd TEI-P5-Java-model/
#this chooses version 0.3, which is currently used by PDFExtract
git checkout 29d668e
mvn install
cd ..
#
# 3. install patched PDFBox
#
svn checkout http://svn.apache.org/repos/asf/pdfbox/trunk/ pdfbox
#apply patch (tested against pdfbox svn r1157684)
cd pdfbox
patch -p0 < ../PDFExtract/parent/patch/pdfbox_poms.patch
patch -p0 < ../PDFExtract/parent/patch/pdfbox-font-bounding-boxes.patch
patch -p0 < ../PDFExtract/parent/patch/pdfbox-drawer-visibility.patch
mvn install
cd ..
#
# 4. install PDFExtract
#
git clone http://github.com/elacin/PDFExtract.git
cd PDFExtract/parent
mvn -DskipTests=true assembly:assembly #yes, some cleanup of tests is in order
# the binary distribution will end up as PDFExtract/pdfextract-cli/target/pdfextract-cli-${VERSION}-bin.tar.bz2